blob: 9f413175463d115fbbe0f692b145725f8059a44e [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
536 v = PyString_FromStringAndSize((char *)NULL,
537 recode_encoding ? 4*len:len);
538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
544#ifdef Py_USING_UNICODE
545 if (recode_encoding && (*s & 0x80)) {
546 PyObject *u, *w;
547 char *r;
548 const char* t;
549 int rn;
550 t = s;
551 /* Decode non-ASCII bytes as UTF-8. */
552 while (t < end && (*t & 0x80)) t++;
553 u = PyUnicode_DecodeUTF8(s, t - s, errors);
554 if(!u) goto failed;
555
556 /* Recode them in target encoding. */
557 w = PyUnicode_AsEncodedString(
558 u, recode_encoding, errors);
559 Py_DECREF(u);
560 if (!w) goto failed;
561
562 /* Append bytes to output buffer. */
563 r = PyString_AsString(w);
564 rn = PyString_Size(w);
565 memcpy(p, r, rn);
566 p += rn;
567 Py_DECREF(w);
568 s = t;
569 } else {
570 *p++ = *s++;
571 }
572#else
573 *p++ = *s++;
574#endif
575 continue;
576 }
577 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000578 if (s==end) {
579 PyErr_SetString(PyExc_ValueError,
580 "Trailing \\ in string");
581 goto failed;
582 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000583 switch (*s++) {
584 /* XXX This assumes ASCII! */
585 case '\n': break;
586 case '\\': *p++ = '\\'; break;
587 case '\'': *p++ = '\''; break;
588 case '\"': *p++ = '\"'; break;
589 case 'b': *p++ = '\b'; break;
590 case 'f': *p++ = '\014'; break; /* FF */
591 case 't': *p++ = '\t'; break;
592 case 'n': *p++ = '\n'; break;
593 case 'r': *p++ = '\r'; break;
594 case 'v': *p++ = '\013'; break; /* VT */
595 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
596 case '0': case '1': case '2': case '3':
597 case '4': case '5': case '6': case '7':
598 c = s[-1] - '0';
599 if ('0' <= *s && *s <= '7') {
600 c = (c<<3) + *s++ - '0';
601 if ('0' <= *s && *s <= '7')
602 c = (c<<3) + *s++ - '0';
603 }
604 *p++ = c;
605 break;
606 case 'x':
607 if (isxdigit(Py_CHARMASK(s[0]))
608 && isxdigit(Py_CHARMASK(s[1]))) {
609 unsigned int x = 0;
610 c = Py_CHARMASK(*s);
611 s++;
612 if (isdigit(c))
613 x = c - '0';
614 else if (islower(c))
615 x = 10 + c - 'a';
616 else
617 x = 10 + c - 'A';
618 x = x << 4;
619 c = Py_CHARMASK(*s);
620 s++;
621 if (isdigit(c))
622 x += c - '0';
623 else if (islower(c))
624 x += 10 + c - 'a';
625 else
626 x += 10 + c - 'A';
627 *p++ = x;
628 break;
629 }
630 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 PyErr_SetString(PyExc_ValueError,
632 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000633 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 }
635 if (strcmp(errors, "replace") == 0) {
636 *p++ = '?';
637 } else if (strcmp(errors, "ignore") == 0)
638 /* do nothing */;
639 else {
640 PyErr_Format(PyExc_ValueError,
641 "decoding error; "
642 "unknown error handling code: %.400s",
643 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000644 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000645 }
646#ifndef Py_USING_UNICODE
647 case 'u':
648 case 'U':
649 case 'N':
650 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "Unicode escapes not legal "
653 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656#endif
657 default:
658 *p++ = '\\';
659 *p++ = s[-1];
660 break;
661 }
662 }
663 _PyString_Resize(&v, (int)(p - buf));
664 return v;
665 failed:
666 Py_DECREF(v);
667 return NULL;
668}
669
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000670static int
671string_getsize(register PyObject *op)
672{
673 char *s;
674 int len;
675 if (PyString_AsStringAndSize(op, &s, &len))
676 return -1;
677 return len;
678}
679
680static /*const*/ char *
681string_getbuffer(register PyObject *op)
682{
683 char *s;
684 int len;
685 if (PyString_AsStringAndSize(op, &s, &len))
686 return NULL;
687 return s;
688}
689
Guido van Rossumd7047b31995-01-02 19:07:15 +0000690int
Fred Drakeba096332000-07-09 07:04:36 +0000691PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000692{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000693 if (!PyString_Check(op))
694 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000695 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696}
697
698/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000699PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000701 if (!PyString_Check(op))
702 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000703 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704}
705
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000706int
707PyString_AsStringAndSize(register PyObject *obj,
708 register char **s,
709 register int *len)
710{
711 if (s == NULL) {
712 PyErr_BadInternalCall();
713 return -1;
714 }
715
716 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000717#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (PyUnicode_Check(obj)) {
719 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
720 if (obj == NULL)
721 return -1;
722 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000723 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000724#endif
725 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 PyErr_Format(PyExc_TypeError,
727 "expected string or Unicode object, "
728 "%.200s found", obj->ob_type->tp_name);
729 return -1;
730 }
731 }
732
733 *s = PyString_AS_STRING(obj);
734 if (len != NULL)
735 *len = PyString_GET_SIZE(obj);
736 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
737 PyErr_SetString(PyExc_TypeError,
738 "expected string without null bytes");
739 return -1;
740 }
741 return 0;
742}
743
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000744/* Methods */
745
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000746static int
Fred Drakeba096332000-07-09 07:04:36 +0000747string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748{
749 int i;
750 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000751 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000752
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000753 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000754 if (! PyString_CheckExact(op)) {
755 int ret;
756 /* A str subclass may have its own __str__ method. */
757 op = (PyStringObject *) PyObject_Str((PyObject *)op);
758 if (op == NULL)
759 return -1;
760 ret = string_print(op, fp, flags);
761 Py_DECREF(op);
762 return ret;
763 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000764 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000765 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000766 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000767 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000768
Thomas Wouters7e474022000-07-16 12:04:32 +0000769 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000770 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000771 if (memchr(op->ob_sval, '\'', op->ob_size) &&
772 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000773 quote = '"';
774
775 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000776 for (i = 0; i < op->ob_size; i++) {
777 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000778 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000780 else if (c == '\t')
781 fprintf(fp, "\\t");
782 else if (c == '\n')
783 fprintf(fp, "\\n");
784 else if (c == '\r')
785 fprintf(fp, "\\r");
786 else if (c < ' ' || c >= 0x7f)
787 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000789 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000791 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000792 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793}
794
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000795PyObject *
796PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000798 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000799 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
800 PyObject *v;
801 if (newsize > INT_MAX) {
802 PyErr_SetString(PyExc_OverflowError,
803 "string is too large to make repr");
804 }
805 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000806 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000807 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808 }
809 else {
810 register int i;
811 register char c;
812 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000813 int quote;
814
Thomas Wouters7e474022000-07-16 12:04:32 +0000815 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000817 if (smartquotes &&
818 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000819 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 quote = '"';
821
Tim Peters9161c8b2001-12-03 01:55:38 +0000822 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000824 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000825 /* There's at least enough room for a hex escape
826 and a closing quote. */
827 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000829 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000831 else if (c == '\t')
832 *p++ = '\\', *p++ = 't';
833 else if (c == '\n')
834 *p++ = '\\', *p++ = 'n';
835 else if (c == '\r')
836 *p++ = '\\', *p++ = 'r';
837 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000838 /* For performance, we don't want to call
839 PyOS_snprintf here (extra layers of
840 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000841 sprintf(p, "\\x%02x", c & 0xff);
842 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843 }
844 else
845 *p++ = c;
846 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000847 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000848 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000850 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000851 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000852 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854}
855
Guido van Rossum189f1df2001-05-01 16:51:53 +0000856static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000857string_repr(PyObject *op)
858{
859 return PyString_Repr(op, 1);
860}
861
862static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863string_str(PyObject *s)
864{
Tim Petersc9933152001-10-16 20:18:24 +0000865 assert(PyString_Check(s));
866 if (PyString_CheckExact(s)) {
867 Py_INCREF(s);
868 return s;
869 }
870 else {
871 /* Subtype -- return genuine string with the same value. */
872 PyStringObject *t = (PyStringObject *) s;
873 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
874 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000875}
876
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877static int
Fred Drakeba096332000-07-09 07:04:36 +0000878string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879{
880 return a->ob_size;
881}
882
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000883static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000884string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000885{
886 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000887 register PyStringObject *op;
888 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000889#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000890 if (PyUnicode_Check(bb))
891 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000892#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000893 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000894 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000895 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 return NULL;
897 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000898#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000900 if ((a->ob_size == 0 || b->ob_size == 0) &&
901 PyString_CheckExact(a) && PyString_CheckExact(b)) {
902 if (a->ob_size == 0) {
903 Py_INCREF(bb);
904 return bb;
905 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000906 Py_INCREF(a);
907 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000908 }
909 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000910 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000911 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000912 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000915 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000916 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000917 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000918 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
919 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
920 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922#undef b
923}
924
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000926string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927{
928 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000929 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000931 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000932 if (n < 0)
933 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000934 /* watch out for overflows: the size can overflow int,
935 * and the # of bytes needed can overflow size_t
936 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000938 if (n && size / n != a->ob_size) {
939 PyErr_SetString(PyExc_OverflowError,
940 "repeated string is too long");
941 return NULL;
942 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000943 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944 Py_INCREF(a);
945 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 }
Tim Peters8f422462000-09-09 06:13:41 +0000947 nbytes = size * sizeof(char);
948 if (nbytes / sizeof(char) != (size_t)size ||
949 nbytes + sizeof(PyStringObject) <= nbytes) {
950 PyErr_SetString(PyExc_OverflowError,
951 "repeated string is too long");
952 return NULL;
953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000955 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000956 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000958 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000960 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000961 for (i = 0; i < size; i += a->ob_size)
962 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
963 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965}
966
967/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
968
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000970string_slice(register PyStringObject *a, register int i, register int j)
971 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000972{
973 if (i < 0)
974 i = 0;
975 if (j < 0)
976 j = 0; /* Avoid signed/unsigned bug in next line */
977 if (j > a->ob_size)
978 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000979 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
980 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981 Py_INCREF(a);
982 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000983 }
984 if (j < i)
985 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000986 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987}
988
Guido van Rossum9284a572000-03-07 15:53:43 +0000989static int
Fred Drakeba096332000-07-09 07:04:36 +0000990string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000991{
Barry Warsaw817918c2002-08-06 16:58:21 +0000992 const char *lhs, *rhs, *end;
993 int size;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000994#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000995 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000996 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000997#endif
Barry Warsaw817918c2002-08-06 16:58:21 +0000998 if (!PyString_Check(el)) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000999 PyErr_SetString(PyExc_TypeError,
Barry Warsaw817918c2002-08-06 16:58:21 +00001000 "'in <string>' requires string as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +00001001 return -1;
1002 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001003 size = PyString_Size(el);
1004 rhs = PyString_AS_STRING(el);
1005 lhs = PyString_AS_STRING(a);
1006
1007 /* optimize for a single character */
1008 if (size == 1)
1009 return memchr(lhs, *rhs, PyString_Size(a)) != NULL;
1010
1011 end = lhs + (PyString_Size(a) - size);
1012 while (lhs <= end) {
1013 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001014 return 1;
1015 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001016
Guido van Rossum9284a572000-03-07 15:53:43 +00001017 return 0;
1018}
1019
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001021string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001022{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001024 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001025 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001026 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001027 return NULL;
1028 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001029 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001030 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001031 if (v == NULL)
1032 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001033 else {
1034#ifdef COUNT_ALLOCS
1035 one_strings++;
1036#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001037 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001038 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001039 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040}
1041
Martin v. Löwiscd353062001-05-24 16:56:35 +00001042static PyObject*
1043string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001044{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001045 int c;
1046 int len_a, len_b;
1047 int min_len;
1048 PyObject *result;
1049
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001050 /* Make sure both arguments are strings. */
1051 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001052 result = Py_NotImplemented;
1053 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001054 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001055 if (a == b) {
1056 switch (op) {
1057 case Py_EQ:case Py_LE:case Py_GE:
1058 result = Py_True;
1059 goto out;
1060 case Py_NE:case Py_LT:case Py_GT:
1061 result = Py_False;
1062 goto out;
1063 }
1064 }
1065 if (op == Py_EQ) {
1066 /* Supporting Py_NE here as well does not save
1067 much time, since Py_NE is rarely used. */
1068 if (a->ob_size == b->ob_size
1069 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001070 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001071 a->ob_size) == 0)) {
1072 result = Py_True;
1073 } else {
1074 result = Py_False;
1075 }
1076 goto out;
1077 }
1078 len_a = a->ob_size; len_b = b->ob_size;
1079 min_len = (len_a < len_b) ? len_a : len_b;
1080 if (min_len > 0) {
1081 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1082 if (c==0)
1083 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1084 }else
1085 c = 0;
1086 if (c == 0)
1087 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1088 switch (op) {
1089 case Py_LT: c = c < 0; break;
1090 case Py_LE: c = c <= 0; break;
1091 case Py_EQ: assert(0); break; /* unreachable */
1092 case Py_NE: c = c != 0; break;
1093 case Py_GT: c = c > 0; break;
1094 case Py_GE: c = c >= 0; break;
1095 default:
1096 result = Py_NotImplemented;
1097 goto out;
1098 }
1099 result = c ? Py_True : Py_False;
1100 out:
1101 Py_INCREF(result);
1102 return result;
1103}
1104
1105int
1106_PyString_Eq(PyObject *o1, PyObject *o2)
1107{
1108 PyStringObject *a, *b;
1109 a = (PyStringObject*)o1;
1110 b = (PyStringObject*)o2;
1111 return a->ob_size == b->ob_size
1112 && *a->ob_sval == *b->ob_sval
1113 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001114}
1115
Guido van Rossum9bfef441993-03-29 10:43:31 +00001116static long
Fred Drakeba096332000-07-09 07:04:36 +00001117string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001118{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001119 register int len;
1120 register unsigned char *p;
1121 register long x;
1122
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001123 if (a->ob_shash != -1)
1124 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001125 len = a->ob_size;
1126 p = (unsigned char *) a->ob_sval;
1127 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001128 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001129 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001130 x ^= a->ob_size;
1131 if (x == -1)
1132 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001133 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001134 return x;
1135}
1136
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001137static PyObject*
1138string_subscript(PyStringObject* self, PyObject* item)
1139{
1140 if (PyInt_Check(item)) {
1141 long i = PyInt_AS_LONG(item);
1142 if (i < 0)
1143 i += PyString_GET_SIZE(self);
1144 return string_item(self,i);
1145 }
1146 else if (PyLong_Check(item)) {
1147 long i = PyLong_AsLong(item);
1148 if (i == -1 && PyErr_Occurred())
1149 return NULL;
1150 if (i < 0)
1151 i += PyString_GET_SIZE(self);
1152 return string_item(self,i);
1153 }
1154 else if (PySlice_Check(item)) {
1155 int start, stop, step, slicelength, cur, i;
1156 char* source_buf;
1157 char* result_buf;
1158 PyObject* result;
1159
1160 if (PySlice_GetIndicesEx((PySliceObject*)item,
1161 PyString_GET_SIZE(self),
1162 &start, &stop, &step, &slicelength) < 0) {
1163 return NULL;
1164 }
1165
1166 if (slicelength <= 0) {
1167 return PyString_FromStringAndSize("", 0);
1168 }
1169 else {
1170 source_buf = PyString_AsString((PyObject*)self);
1171 result_buf = PyMem_Malloc(slicelength);
1172
1173 for (cur = start, i = 0; i < slicelength;
1174 cur += step, i++) {
1175 result_buf[i] = source_buf[cur];
1176 }
1177
1178 result = PyString_FromStringAndSize(result_buf,
1179 slicelength);
1180 PyMem_Free(result_buf);
1181 return result;
1182 }
1183 }
1184 else {
1185 PyErr_SetString(PyExc_TypeError,
1186 "string indices must be integers");
1187 return NULL;
1188 }
1189}
1190
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001191static int
Fred Drakeba096332000-07-09 07:04:36 +00001192string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001193{
1194 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001195 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001196 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001197 return -1;
1198 }
1199 *ptr = (void *)self->ob_sval;
1200 return self->ob_size;
1201}
1202
1203static int
Fred Drakeba096332000-07-09 07:04:36 +00001204string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001205{
Guido van Rossum045e6881997-09-08 18:30:11 +00001206 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001207 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001208 return -1;
1209}
1210
1211static int
Fred Drakeba096332000-07-09 07:04:36 +00001212string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001213{
1214 if ( lenp )
1215 *lenp = self->ob_size;
1216 return 1;
1217}
1218
Guido van Rossum1db70701998-10-08 02:18:52 +00001219static int
Fred Drakeba096332000-07-09 07:04:36 +00001220string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001221{
1222 if ( index != 0 ) {
1223 PyErr_SetString(PyExc_SystemError,
1224 "accessing non-existent string segment");
1225 return -1;
1226 }
1227 *ptr = self->ob_sval;
1228 return self->ob_size;
1229}
1230
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001231static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001232 (inquiry)string_length, /*sq_length*/
1233 (binaryfunc)string_concat, /*sq_concat*/
1234 (intargfunc)string_repeat, /*sq_repeat*/
1235 (intargfunc)string_item, /*sq_item*/
1236 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001237 0, /*sq_ass_item*/
1238 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001239 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001240};
1241
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001242static PyMappingMethods string_as_mapping = {
1243 (inquiry)string_length,
1244 (binaryfunc)string_subscript,
1245 0,
1246};
1247
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001248static PyBufferProcs string_as_buffer = {
1249 (getreadbufferproc)string_buffer_getreadbuf,
1250 (getwritebufferproc)string_buffer_getwritebuf,
1251 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001252 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001253};
1254
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001255
1256
1257#define LEFTSTRIP 0
1258#define RIGHTSTRIP 1
1259#define BOTHSTRIP 2
1260
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001261/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001262static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1263
1264#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001265
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001266
1267static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001268split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001269{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001270 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001271 PyObject* item;
1272 PyObject *list = PyList_New(0);
1273
1274 if (list == NULL)
1275 return NULL;
1276
Guido van Rossum4c08d552000-03-10 22:55:18 +00001277 for (i = j = 0; i < len; ) {
1278 while (i < len && isspace(Py_CHARMASK(s[i])))
1279 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001280 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001281 while (i < len && !isspace(Py_CHARMASK(s[i])))
1282 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001283 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001284 if (maxsplit-- <= 0)
1285 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1287 if (item == NULL)
1288 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289 err = PyList_Append(list, item);
1290 Py_DECREF(item);
1291 if (err < 0)
1292 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001293 while (i < len && isspace(Py_CHARMASK(s[i])))
1294 i++;
1295 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001296 }
1297 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001298 if (j < len) {
1299 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1300 if (item == NULL)
1301 goto finally;
1302 err = PyList_Append(list, item);
1303 Py_DECREF(item);
1304 if (err < 0)
1305 goto finally;
1306 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001307 return list;
1308 finally:
1309 Py_DECREF(list);
1310 return NULL;
1311}
1312
1313
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001314PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315"S.split([sep [,maxsplit]]) -> list of strings\n\
1316\n\
1317Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001318delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001319splits are done. If sep is not specified or is None, any\n\
1320whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321
1322static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001323string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001324{
1325 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001326 int maxsplit = -1;
1327 const char *s = PyString_AS_STRING(self), *sub;
1328 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001329
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001331 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 if (maxsplit < 0)
1333 maxsplit = INT_MAX;
1334 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336 if (PyString_Check(subobj)) {
1337 sub = PyString_AS_STRING(subobj);
1338 n = PyString_GET_SIZE(subobj);
1339 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001340#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341 else if (PyUnicode_Check(subobj))
1342 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001343#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001344 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1345 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346 if (n == 0) {
1347 PyErr_SetString(PyExc_ValueError, "empty separator");
1348 return NULL;
1349 }
1350
1351 list = PyList_New(0);
1352 if (list == NULL)
1353 return NULL;
1354
1355 i = j = 0;
1356 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001357 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001358 if (maxsplit-- <= 0)
1359 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001360 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1361 if (item == NULL)
1362 goto fail;
1363 err = PyList_Append(list, item);
1364 Py_DECREF(item);
1365 if (err < 0)
1366 goto fail;
1367 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368 }
1369 else
1370 i++;
1371 }
1372 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1373 if (item == NULL)
1374 goto fail;
1375 err = PyList_Append(list, item);
1376 Py_DECREF(item);
1377 if (err < 0)
1378 goto fail;
1379
1380 return list;
1381
1382 fail:
1383 Py_DECREF(list);
1384 return NULL;
1385}
1386
1387
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001388PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389"S.join(sequence) -> string\n\
1390\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001391Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001392sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393
1394static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001395string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396{
1397 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001398 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 char *p;
1401 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001402 size_t sz = 0;
1403 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001404 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405
Tim Peters19fe14e2001-01-19 03:03:47 +00001406 seq = PySequence_Fast(orig, "");
1407 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001408 if (PyErr_ExceptionMatches(PyExc_TypeError))
1409 PyErr_Format(PyExc_TypeError,
1410 "sequence expected, %.80s found",
1411 orig->ob_type->tp_name);
1412 return NULL;
1413 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001414
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001415 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001416 if (seqlen == 0) {
1417 Py_DECREF(seq);
1418 return PyString_FromString("");
1419 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001421 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001422 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1423 PyErr_Format(PyExc_TypeError,
1424 "sequence item 0: expected string,"
1425 " %.80s found",
1426 item->ob_type->tp_name);
1427 Py_DECREF(seq);
1428 return NULL;
1429 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001430 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001431 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001432 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001434
Tim Peters19fe14e2001-01-19 03:03:47 +00001435 /* There are at least two things to join. Do a pre-pass to figure out
1436 * the total amount of space we'll need (sz), see whether any argument
1437 * is absurd, and defer to the Unicode join if appropriate.
1438 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001439 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001440 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001441 item = PySequence_Fast_GET_ITEM(seq, i);
1442 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001443#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001444 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001445 /* Defer to Unicode join.
1446 * CAUTION: There's no gurantee that the
1447 * original sequence can be iterated over
1448 * again, so we must pass seq here.
1449 */
1450 PyObject *result;
1451 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001452 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001453 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001454 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001455#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001456 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001457 "sequence item %i: expected string,"
1458 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001459 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001460 Py_DECREF(seq);
1461 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001463 sz += PyString_GET_SIZE(item);
1464 if (i != 0)
1465 sz += seplen;
1466 if (sz < old_sz || sz > INT_MAX) {
1467 PyErr_SetString(PyExc_OverflowError,
1468 "join() is too long for a Python string");
1469 Py_DECREF(seq);
1470 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001472 }
1473
1474 /* Allocate result space. */
1475 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1476 if (res == NULL) {
1477 Py_DECREF(seq);
1478 return NULL;
1479 }
1480
1481 /* Catenate everything. */
1482 p = PyString_AS_STRING(res);
1483 for (i = 0; i < seqlen; ++i) {
1484 size_t n;
1485 item = PySequence_Fast_GET_ITEM(seq, i);
1486 n = PyString_GET_SIZE(item);
1487 memcpy(p, PyString_AS_STRING(item), n);
1488 p += n;
1489 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001490 memcpy(p, sep, seplen);
1491 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001492 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001494
Jeremy Hylton49048292000-07-11 03:28:17 +00001495 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497}
1498
Tim Peters52e155e2001-06-16 05:42:57 +00001499PyObject *
1500_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001501{
Tim Petersa7259592001-06-16 05:11:17 +00001502 assert(sep != NULL && PyString_Check(sep));
1503 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001504 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001505}
1506
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001507static void
1508string_adjust_indices(int *start, int *end, int len)
1509{
1510 if (*end > len)
1511 *end = len;
1512 else if (*end < 0)
1513 *end += len;
1514 if (*end < 0)
1515 *end = 0;
1516 if (*start < 0)
1517 *start += len;
1518 if (*start < 0)
1519 *start = 0;
1520}
1521
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522static long
Fred Drakeba096332000-07-09 07:04:36 +00001523string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001525 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001526 int len = PyString_GET_SIZE(self);
1527 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001528 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001529
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001530 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001531 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001532 return -2;
1533 if (PyString_Check(subobj)) {
1534 sub = PyString_AS_STRING(subobj);
1535 n = PyString_GET_SIZE(subobj);
1536 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001537#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001538 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001539 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001540#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001541 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542 return -2;
1543
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001544 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545
Guido van Rossum4c08d552000-03-10 22:55:18 +00001546 if (dir > 0) {
1547 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 last -= n;
1550 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001551 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 return (long)i;
1553 }
1554 else {
1555 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001556
Guido van Rossum4c08d552000-03-10 22:55:18 +00001557 if (n == 0 && i <= last)
1558 return (long)last;
1559 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001560 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001561 return (long)j;
1562 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001563
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564 return -1;
1565}
1566
1567
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001568PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001569"S.find(sub [,start [,end]]) -> int\n\
1570\n\
1571Return the lowest index in S where substring sub is found,\n\
1572such that sub is contained within s[start,end]. Optional\n\
1573arguments start and end are interpreted as in slice notation.\n\
1574\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001575Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576
1577static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001578string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001580 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001581 if (result == -2)
1582 return NULL;
1583 return PyInt_FromLong(result);
1584}
1585
1586
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001587PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588"S.index(sub [,start [,end]]) -> int\n\
1589\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001590Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591
1592static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001593string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001595 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596 if (result == -2)
1597 return NULL;
1598 if (result == -1) {
1599 PyErr_SetString(PyExc_ValueError,
1600 "substring not found in string.index");
1601 return NULL;
1602 }
1603 return PyInt_FromLong(result);
1604}
1605
1606
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001607PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608"S.rfind(sub [,start [,end]]) -> int\n\
1609\n\
1610Return the highest index in S where substring sub is found,\n\
1611such that sub is contained within s[start,end]. Optional\n\
1612arguments start and end are interpreted as in slice notation.\n\
1613\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001614Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615
1616static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001617string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001619 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620 if (result == -2)
1621 return NULL;
1622 return PyInt_FromLong(result);
1623}
1624
1625
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001626PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627"S.rindex(sub [,start [,end]]) -> int\n\
1628\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001629Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630
1631static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001632string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001634 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635 if (result == -2)
1636 return NULL;
1637 if (result == -1) {
1638 PyErr_SetString(PyExc_ValueError,
1639 "substring not found in string.rindex");
1640 return NULL;
1641 }
1642 return PyInt_FromLong(result);
1643}
1644
1645
1646static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001647do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1648{
1649 char *s = PyString_AS_STRING(self);
1650 int len = PyString_GET_SIZE(self);
1651 char *sep = PyString_AS_STRING(sepobj);
1652 int seplen = PyString_GET_SIZE(sepobj);
1653 int i, j;
1654
1655 i = 0;
1656 if (striptype != RIGHTSTRIP) {
1657 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1658 i++;
1659 }
1660 }
1661
1662 j = len;
1663 if (striptype != LEFTSTRIP) {
1664 do {
1665 j--;
1666 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1667 j++;
1668 }
1669
1670 if (i == 0 && j == len && PyString_CheckExact(self)) {
1671 Py_INCREF(self);
1672 return (PyObject*)self;
1673 }
1674 else
1675 return PyString_FromStringAndSize(s+i, j-i);
1676}
1677
1678
1679static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001680do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001681{
1682 char *s = PyString_AS_STRING(self);
1683 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001685 i = 0;
1686 if (striptype != RIGHTSTRIP) {
1687 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1688 i++;
1689 }
1690 }
1691
1692 j = len;
1693 if (striptype != LEFTSTRIP) {
1694 do {
1695 j--;
1696 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1697 j++;
1698 }
1699
Tim Peters8fa5dd02001-09-12 02:18:30 +00001700 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001701 Py_INCREF(self);
1702 return (PyObject*)self;
1703 }
1704 else
1705 return PyString_FromStringAndSize(s+i, j-i);
1706}
1707
1708
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001709static PyObject *
1710do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1711{
1712 PyObject *sep = NULL;
1713
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001714 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001715 return NULL;
1716
1717 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001718 if (PyString_Check(sep))
1719 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001720#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001721 else if (PyUnicode_Check(sep)) {
1722 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1723 PyObject *res;
1724 if (uniself==NULL)
1725 return NULL;
1726 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1727 striptype, sep);
1728 Py_DECREF(uniself);
1729 return res;
1730 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001731#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001732 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001733 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001734#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001735 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001736#else
1737 "%s arg must be None or str",
1738#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001739 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001740 return NULL;
1741 }
1742 return do_xstrip(self, striptype, sep);
1743 }
1744
1745 return do_strip(self, striptype);
1746}
1747
1748
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001749PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001750"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751\n\
1752Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001753whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001754If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001755If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756
1757static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001758string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001760 if (PyTuple_GET_SIZE(args) == 0)
1761 return do_strip(self, BOTHSTRIP); /* Common case */
1762 else
1763 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764}
1765
1766
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001767PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001768"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001769\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001770Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001771If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001772If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773
1774static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001775string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001777 if (PyTuple_GET_SIZE(args) == 0)
1778 return do_strip(self, LEFTSTRIP); /* Common case */
1779 else
1780 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781}
1782
1783
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001784PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001785"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001787Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001788If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001789If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790
1791static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001792string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001794 if (PyTuple_GET_SIZE(args) == 0)
1795 return do_strip(self, RIGHTSTRIP); /* Common case */
1796 else
1797 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798}
1799
1800
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001801PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802"S.lower() -> string\n\
1803\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001804Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805
1806static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001807string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808{
1809 char *s = PyString_AS_STRING(self), *s_new;
1810 int i, n = PyString_GET_SIZE(self);
1811 PyObject *new;
1812
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813 new = PyString_FromStringAndSize(NULL, n);
1814 if (new == NULL)
1815 return NULL;
1816 s_new = PyString_AsString(new);
1817 for (i = 0; i < n; i++) {
1818 int c = Py_CHARMASK(*s++);
1819 if (isupper(c)) {
1820 *s_new = tolower(c);
1821 } else
1822 *s_new = c;
1823 s_new++;
1824 }
1825 return new;
1826}
1827
1828
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001829PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830"S.upper() -> string\n\
1831\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001832Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001833
1834static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001835string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836{
1837 char *s = PyString_AS_STRING(self), *s_new;
1838 int i, n = PyString_GET_SIZE(self);
1839 PyObject *new;
1840
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841 new = PyString_FromStringAndSize(NULL, n);
1842 if (new == NULL)
1843 return NULL;
1844 s_new = PyString_AsString(new);
1845 for (i = 0; i < n; i++) {
1846 int c = Py_CHARMASK(*s++);
1847 if (islower(c)) {
1848 *s_new = toupper(c);
1849 } else
1850 *s_new = c;
1851 s_new++;
1852 }
1853 return new;
1854}
1855
1856
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001857PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001858"S.title() -> string\n\
1859\n\
1860Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001861characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001862
1863static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001864string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865{
1866 char *s = PyString_AS_STRING(self), *s_new;
1867 int i, n = PyString_GET_SIZE(self);
1868 int previous_is_cased = 0;
1869 PyObject *new;
1870
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871 new = PyString_FromStringAndSize(NULL, n);
1872 if (new == NULL)
1873 return NULL;
1874 s_new = PyString_AsString(new);
1875 for (i = 0; i < n; i++) {
1876 int c = Py_CHARMASK(*s++);
1877 if (islower(c)) {
1878 if (!previous_is_cased)
1879 c = toupper(c);
1880 previous_is_cased = 1;
1881 } else if (isupper(c)) {
1882 if (previous_is_cased)
1883 c = tolower(c);
1884 previous_is_cased = 1;
1885 } else
1886 previous_is_cased = 0;
1887 *s_new++ = c;
1888 }
1889 return new;
1890}
1891
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001892PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893"S.capitalize() -> string\n\
1894\n\
1895Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001896capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897
1898static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001899string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900{
1901 char *s = PyString_AS_STRING(self), *s_new;
1902 int i, n = PyString_GET_SIZE(self);
1903 PyObject *new;
1904
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905 new = PyString_FromStringAndSize(NULL, n);
1906 if (new == NULL)
1907 return NULL;
1908 s_new = PyString_AsString(new);
1909 if (0 < n) {
1910 int c = Py_CHARMASK(*s++);
1911 if (islower(c))
1912 *s_new = toupper(c);
1913 else
1914 *s_new = c;
1915 s_new++;
1916 }
1917 for (i = 1; i < n; i++) {
1918 int c = Py_CHARMASK(*s++);
1919 if (isupper(c))
1920 *s_new = tolower(c);
1921 else
1922 *s_new = c;
1923 s_new++;
1924 }
1925 return new;
1926}
1927
1928
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001929PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930"S.count(sub[, start[, end]]) -> int\n\
1931\n\
1932Return the number of occurrences of substring sub in string\n\
1933S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001934interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935
1936static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001937string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001939 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940 int len = PyString_GET_SIZE(self), n;
1941 int i = 0, last = INT_MAX;
1942 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001943 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944
Guido van Rossumc6821402000-05-08 14:08:05 +00001945 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1946 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001948
Guido van Rossum4c08d552000-03-10 22:55:18 +00001949 if (PyString_Check(subobj)) {
1950 sub = PyString_AS_STRING(subobj);
1951 n = PyString_GET_SIZE(subobj);
1952 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001953#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001954 else if (PyUnicode_Check(subobj)) {
1955 int count;
1956 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1957 if (count == -1)
1958 return NULL;
1959 else
1960 return PyInt_FromLong((long) count);
1961 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001962#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001963 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1964 return NULL;
1965
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001966 string_adjust_indices(&i, &last, len);
1967
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968 m = last + 1 - n;
1969 if (n == 0)
1970 return PyInt_FromLong((long) (m-i));
1971
1972 r = 0;
1973 while (i < m) {
1974 if (!memcmp(s+i, sub, n)) {
1975 r++;
1976 i += n;
1977 } else {
1978 i++;
1979 }
1980 }
1981 return PyInt_FromLong((long) r);
1982}
1983
1984
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001985PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986"S.swapcase() -> string\n\
1987\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001988Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001989converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990
1991static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001992string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993{
1994 char *s = PyString_AS_STRING(self), *s_new;
1995 int i, n = PyString_GET_SIZE(self);
1996 PyObject *new;
1997
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998 new = PyString_FromStringAndSize(NULL, n);
1999 if (new == NULL)
2000 return NULL;
2001 s_new = PyString_AsString(new);
2002 for (i = 0; i < n; i++) {
2003 int c = Py_CHARMASK(*s++);
2004 if (islower(c)) {
2005 *s_new = toupper(c);
2006 }
2007 else if (isupper(c)) {
2008 *s_new = tolower(c);
2009 }
2010 else
2011 *s_new = c;
2012 s_new++;
2013 }
2014 return new;
2015}
2016
2017
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002018PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019"S.translate(table [,deletechars]) -> string\n\
2020\n\
2021Return a copy of the string S, where all characters occurring\n\
2022in the optional argument deletechars are removed, and the\n\
2023remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002024translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025
2026static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002027string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002029 register char *input, *output;
2030 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031 register int i, c, changed = 0;
2032 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002033 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034 int inlen, tablen, dellen = 0;
2035 PyObject *result;
2036 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002037 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038
Guido van Rossum4c08d552000-03-10 22:55:18 +00002039 if (!PyArg_ParseTuple(args, "O|O:translate",
2040 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002042
2043 if (PyString_Check(tableobj)) {
2044 table1 = PyString_AS_STRING(tableobj);
2045 tablen = PyString_GET_SIZE(tableobj);
2046 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002047#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002048 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002049 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002050 parameter; instead a mapping to None will cause characters
2051 to be deleted. */
2052 if (delobj != NULL) {
2053 PyErr_SetString(PyExc_TypeError,
2054 "deletions are implemented differently for unicode");
2055 return NULL;
2056 }
2057 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2058 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002059#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002062
2063 if (delobj != NULL) {
2064 if (PyString_Check(delobj)) {
2065 del_table = PyString_AS_STRING(delobj);
2066 dellen = PyString_GET_SIZE(delobj);
2067 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002068#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069 else if (PyUnicode_Check(delobj)) {
2070 PyErr_SetString(PyExc_TypeError,
2071 "deletions are implemented differently for unicode");
2072 return NULL;
2073 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002074#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002075 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2076 return NULL;
2077
2078 if (tablen != 256) {
2079 PyErr_SetString(PyExc_ValueError,
2080 "translation table must be 256 characters long");
2081 return NULL;
2082 }
2083 }
2084 else {
2085 del_table = NULL;
2086 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002087 }
2088
2089 table = table1;
2090 inlen = PyString_Size(input_obj);
2091 result = PyString_FromStringAndSize((char *)NULL, inlen);
2092 if (result == NULL)
2093 return NULL;
2094 output_start = output = PyString_AsString(result);
2095 input = PyString_AsString(input_obj);
2096
2097 if (dellen == 0) {
2098 /* If no deletions are required, use faster code */
2099 for (i = inlen; --i >= 0; ) {
2100 c = Py_CHARMASK(*input++);
2101 if (Py_CHARMASK((*output++ = table[c])) != c)
2102 changed = 1;
2103 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002104 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002105 return result;
2106 Py_DECREF(result);
2107 Py_INCREF(input_obj);
2108 return input_obj;
2109 }
2110
2111 for (i = 0; i < 256; i++)
2112 trans_table[i] = Py_CHARMASK(table[i]);
2113
2114 for (i = 0; i < dellen; i++)
2115 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2116
2117 for (i = inlen; --i >= 0; ) {
2118 c = Py_CHARMASK(*input++);
2119 if (trans_table[c] != -1)
2120 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2121 continue;
2122 changed = 1;
2123 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002124 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125 Py_DECREF(result);
2126 Py_INCREF(input_obj);
2127 return input_obj;
2128 }
2129 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002130 if (inlen > 0)
2131 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132 return result;
2133}
2134
2135
2136/* What follows is used for implementing replace(). Perry Stoll. */
2137
2138/*
2139 mymemfind
2140
2141 strstr replacement for arbitrary blocks of memory.
2142
Barry Warsaw51ac5802000-03-20 16:36:48 +00002143 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002144 contents of memory pointed to by PAT. Returns the index into MEM if
2145 found, or -1 if not found. If len of PAT is greater than length of
2146 MEM, the function returns -1.
2147*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002148static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002149mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150{
2151 register int ii;
2152
2153 /* pattern can not occur in the last pat_len-1 chars */
2154 len -= pat_len;
2155
2156 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002157 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158 return ii;
2159 }
2160 }
2161 return -1;
2162}
2163
2164/*
2165 mymemcnt
2166
2167 Return the number of distinct times PAT is found in MEM.
2168 meaning mem=1111 and pat==11 returns 2.
2169 mem=11111 and pat==11 also return 2.
2170 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002171static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002172mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173{
2174 register int offset = 0;
2175 int nfound = 0;
2176
2177 while (len >= 0) {
2178 offset = mymemfind(mem, len, pat, pat_len);
2179 if (offset == -1)
2180 break;
2181 mem += offset + pat_len;
2182 len -= offset + pat_len;
2183 nfound++;
2184 }
2185 return nfound;
2186}
2187
2188/*
2189 mymemreplace
2190
Thomas Wouters7e474022000-07-16 12:04:32 +00002191 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192 replaced with SUB.
2193
Thomas Wouters7e474022000-07-16 12:04:32 +00002194 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195 of PAT in STR, then the original string is returned. Otherwise, a new
2196 string is allocated here and returned.
2197
2198 on return, out_len is:
2199 the length of output string, or
2200 -1 if the input string is returned, or
2201 unchanged if an error occurs (no memory).
2202
2203 return value is:
2204 the new string allocated locally, or
2205 NULL if an error occurred.
2206*/
2207static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002208mymemreplace(const char *str, int len, /* input string */
2209 const char *pat, int pat_len, /* pattern string to find */
2210 const char *sub, int sub_len, /* substitution string */
2211 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002212 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213{
2214 char *out_s;
2215 char *new_s;
2216 int nfound, offset, new_len;
2217
2218 if (len == 0 || pat_len > len)
2219 goto return_same;
2220
2221 /* find length of output string */
2222 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00002223 if (count < 0)
2224 count = INT_MAX;
2225 else if (nfound > count)
2226 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 if (nfound == 0)
2228 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002229
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002231 if (new_len == 0) {
2232 /* Have to allocate something for the caller to free(). */
2233 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002234 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002235 return NULL;
2236 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002238 else {
2239 assert(new_len > 0);
2240 new_s = (char *)PyMem_MALLOC(new_len);
2241 if (new_s == NULL)
2242 return NULL;
2243 out_s = new_s;
2244
Tim Peters9c012af2001-05-10 00:32:57 +00002245 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00002246 /* find index of next instance of pattern */
2247 offset = mymemfind(str, len, pat, pat_len);
2248 if (offset == -1)
2249 break;
2250
2251 /* copy non matching part of input string */
2252 memcpy(new_s, str, offset);
2253 str += offset + pat_len;
2254 len -= offset + pat_len;
2255
2256 /* copy substitute into the output string */
2257 new_s += offset;
2258 memcpy(new_s, sub, sub_len);
2259 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002260 }
2261 /* copy any remaining values into output string */
2262 if (len > 0)
2263 memcpy(new_s, str, len);
2264 }
2265 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002266 return out_s;
2267
2268 return_same:
2269 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002270 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271}
2272
2273
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002274PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275"S.replace (old, new[, maxsplit]) -> string\n\
2276\n\
2277Return a copy of string S with all occurrences of substring\n\
2278old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002279given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002280
2281static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002282string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002283{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284 const char *str = PyString_AS_STRING(self), *sub, *repl;
2285 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002286 const int len = PyString_GET_SIZE(self);
2287 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002288 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002289 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002290 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291
Guido van Rossum4c08d552000-03-10 22:55:18 +00002292 if (!PyArg_ParseTuple(args, "OO|i:replace",
2293 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295
2296 if (PyString_Check(subobj)) {
2297 sub = PyString_AS_STRING(subobj);
2298 sub_len = PyString_GET_SIZE(subobj);
2299 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002300#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002302 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002304#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2306 return NULL;
2307
2308 if (PyString_Check(replobj)) {
2309 repl = PyString_AS_STRING(replobj);
2310 repl_len = PyString_GET_SIZE(replobj);
2311 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002312#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002313 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002314 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002315 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002316#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2318 return NULL;
2319
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002320 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002321 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002322 return NULL;
2323 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325 if (new_s == NULL) {
2326 PyErr_NoMemory();
2327 return NULL;
2328 }
2329 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002330 if (PyString_CheckExact(self)) {
2331 /* we're returning another reference to self */
2332 new = (PyObject*)self;
2333 Py_INCREF(new);
2334 }
2335 else {
2336 new = PyString_FromStringAndSize(str, len);
2337 if (new == NULL)
2338 return NULL;
2339 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 }
2341 else {
2342 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002343 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344 }
2345 return new;
2346}
2347
2348
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002349PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002350"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002352Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002354comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355
2356static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002357string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002361 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362 int plen;
2363 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002364 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366
Guido van Rossumc6821402000-05-08 14:08:05 +00002367 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2368 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 return NULL;
2370 if (PyString_Check(subobj)) {
2371 prefix = PyString_AS_STRING(subobj);
2372 plen = PyString_GET_SIZE(subobj);
2373 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002374#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002375 else if (PyUnicode_Check(subobj)) {
2376 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002377 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002378 subobj, start, end, -1);
2379 if (rc == -1)
2380 return NULL;
2381 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002382 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002383 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002384#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002385 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386 return NULL;
2387
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002388 string_adjust_indices(&start, &end, len);
2389
2390 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002391 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002393 if (end-start >= plen)
2394 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2395 else
2396 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002397}
2398
2399
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002400PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002401"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002402\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002403Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002405comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406
2407static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002408string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002410 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002412 const char* suffix;
2413 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002415 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002416 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417
Guido van Rossumc6821402000-05-08 14:08:05 +00002418 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2419 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002420 return NULL;
2421 if (PyString_Check(subobj)) {
2422 suffix = PyString_AS_STRING(subobj);
2423 slen = PyString_GET_SIZE(subobj);
2424 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002425#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002426 else if (PyUnicode_Check(subobj)) {
2427 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002428 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002429 subobj, start, end, +1);
2430 if (rc == -1)
2431 return NULL;
2432 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002433 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002434 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002435#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002437 return NULL;
2438
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002439 string_adjust_indices(&start, &end, len);
2440
2441 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002442 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002444 if (end-slen > start)
2445 start = end - slen;
2446 if (end-start >= slen)
2447 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2448 else
2449 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002450}
2451
2452
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002453PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002454"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002455\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002456Encodes S using the codec registered for encoding. encoding defaults\n\
2457to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002458handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002459a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002460
2461static PyObject *
2462string_encode(PyStringObject *self, PyObject *args)
2463{
2464 char *encoding = NULL;
2465 char *errors = NULL;
2466 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2467 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002468 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2469}
2470
2471
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002472PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002473"S.decode([encoding[,errors]]) -> object\n\
2474\n\
2475Decodes S using the codec registered for encoding. encoding defaults\n\
2476to the default encoding. errors may be given to set a different error\n\
2477handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002478a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002479
2480static PyObject *
2481string_decode(PyStringObject *self, PyObject *args)
2482{
2483 char *encoding = NULL;
2484 char *errors = NULL;
2485 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2486 return NULL;
2487 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002488}
2489
2490
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002491PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002492"S.expandtabs([tabsize]) -> string\n\
2493\n\
2494Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002495If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002496
2497static PyObject*
2498string_expandtabs(PyStringObject *self, PyObject *args)
2499{
2500 const char *e, *p;
2501 char *q;
2502 int i, j;
2503 PyObject *u;
2504 int tabsize = 8;
2505
2506 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2507 return NULL;
2508
Thomas Wouters7e474022000-07-16 12:04:32 +00002509 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002510 i = j = 0;
2511 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2512 for (p = PyString_AS_STRING(self); p < e; p++)
2513 if (*p == '\t') {
2514 if (tabsize > 0)
2515 j += tabsize - (j % tabsize);
2516 }
2517 else {
2518 j++;
2519 if (*p == '\n' || *p == '\r') {
2520 i += j;
2521 j = 0;
2522 }
2523 }
2524
2525 /* Second pass: create output string and fill it */
2526 u = PyString_FromStringAndSize(NULL, i + j);
2527 if (!u)
2528 return NULL;
2529
2530 j = 0;
2531 q = PyString_AS_STRING(u);
2532
2533 for (p = PyString_AS_STRING(self); p < e; p++)
2534 if (*p == '\t') {
2535 if (tabsize > 0) {
2536 i = tabsize - (j % tabsize);
2537 j += i;
2538 while (i--)
2539 *q++ = ' ';
2540 }
2541 }
2542 else {
2543 j++;
2544 *q++ = *p;
2545 if (*p == '\n' || *p == '\r')
2546 j = 0;
2547 }
2548
2549 return u;
2550}
2551
Tim Peters8fa5dd02001-09-12 02:18:30 +00002552static PyObject *
2553pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002554{
2555 PyObject *u;
2556
2557 if (left < 0)
2558 left = 0;
2559 if (right < 0)
2560 right = 0;
2561
Tim Peters8fa5dd02001-09-12 02:18:30 +00002562 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002563 Py_INCREF(self);
2564 return (PyObject *)self;
2565 }
2566
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002567 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002568 left + PyString_GET_SIZE(self) + right);
2569 if (u) {
2570 if (left)
2571 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002572 memcpy(PyString_AS_STRING(u) + left,
2573 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002574 PyString_GET_SIZE(self));
2575 if (right)
2576 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2577 fill, right);
2578 }
2579
2580 return u;
2581}
2582
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002583PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002584"S.ljust(width) -> string\n"
2585"\n"
2586"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002587"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002588
2589static PyObject *
2590string_ljust(PyStringObject *self, PyObject *args)
2591{
2592 int width;
2593 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2594 return NULL;
2595
Tim Peters8fa5dd02001-09-12 02:18:30 +00002596 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002597 Py_INCREF(self);
2598 return (PyObject*) self;
2599 }
2600
2601 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2602}
2603
2604
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002605PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002606"S.rjust(width) -> string\n"
2607"\n"
2608"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002609"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002610
2611static PyObject *
2612string_rjust(PyStringObject *self, PyObject *args)
2613{
2614 int width;
2615 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2616 return NULL;
2617
Tim Peters8fa5dd02001-09-12 02:18:30 +00002618 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002619 Py_INCREF(self);
2620 return (PyObject*) self;
2621 }
2622
2623 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2624}
2625
2626
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002627PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002628"S.center(width) -> string\n"
2629"\n"
2630"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002631"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002632
2633static PyObject *
2634string_center(PyStringObject *self, PyObject *args)
2635{
2636 int marg, left;
2637 int width;
2638
2639 if (!PyArg_ParseTuple(args, "i:center", &width))
2640 return NULL;
2641
Tim Peters8fa5dd02001-09-12 02:18:30 +00002642 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002643 Py_INCREF(self);
2644 return (PyObject*) self;
2645 }
2646
2647 marg = width - PyString_GET_SIZE(self);
2648 left = marg / 2 + (marg & width & 1);
2649
2650 return pad(self, left, marg - left, ' ');
2651}
2652
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002653PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002654"S.zfill(width) -> string\n"
2655"\n"
2656"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002657"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002658
2659static PyObject *
2660string_zfill(PyStringObject *self, PyObject *args)
2661{
2662 int fill;
2663 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002664 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002665
2666 int width;
2667 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2668 return NULL;
2669
2670 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002671 if (PyString_CheckExact(self)) {
2672 Py_INCREF(self);
2673 return (PyObject*) self;
2674 }
2675 else
2676 return PyString_FromStringAndSize(
2677 PyString_AS_STRING(self),
2678 PyString_GET_SIZE(self)
2679 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002680 }
2681
2682 fill = width - PyString_GET_SIZE(self);
2683
2684 s = pad(self, fill, 0, '0');
2685
2686 if (s == NULL)
2687 return NULL;
2688
2689 p = PyString_AS_STRING(s);
2690 if (p[fill] == '+' || p[fill] == '-') {
2691 /* move sign to beginning of string */
2692 p[0] = p[fill];
2693 p[fill] = '0';
2694 }
2695
2696 return (PyObject*) s;
2697}
2698
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002699PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002700"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002701"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002702"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002703"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002704
2705static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002706string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002707{
Fred Drakeba096332000-07-09 07:04:36 +00002708 register const unsigned char *p
2709 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002710 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002711
Guido van Rossum4c08d552000-03-10 22:55:18 +00002712 /* Shortcut for single character strings */
2713 if (PyString_GET_SIZE(self) == 1 &&
2714 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002715 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002716
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002717 /* Special case for empty strings */
2718 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002719 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002720
Guido van Rossum4c08d552000-03-10 22:55:18 +00002721 e = p + PyString_GET_SIZE(self);
2722 for (; p < e; p++) {
2723 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002724 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002725 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002726 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002727}
2728
2729
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002730PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002731"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002732\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002733Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002734and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002735
2736static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002737string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002738{
Fred Drakeba096332000-07-09 07:04:36 +00002739 register const unsigned char *p
2740 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002741 register const unsigned char *e;
2742
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002743 /* Shortcut for single character strings */
2744 if (PyString_GET_SIZE(self) == 1 &&
2745 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002746 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002747
2748 /* Special case for empty strings */
2749 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002750 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002751
2752 e = p + PyString_GET_SIZE(self);
2753 for (; p < e; p++) {
2754 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002755 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002756 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002757 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002758}
2759
2760
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002761PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002762"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002763\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002764Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002765and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002766
2767static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002768string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002769{
Fred Drakeba096332000-07-09 07:04:36 +00002770 register const unsigned char *p
2771 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002772 register const unsigned char *e;
2773
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002774 /* Shortcut for single character strings */
2775 if (PyString_GET_SIZE(self) == 1 &&
2776 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002777 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002778
2779 /* Special case for empty strings */
2780 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002781 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002782
2783 e = p + PyString_GET_SIZE(self);
2784 for (; p < e; p++) {
2785 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002786 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002787 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002788 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002789}
2790
2791
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002792PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002793"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002794\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002795Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002796False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002797
2798static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002799string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002800{
Fred Drakeba096332000-07-09 07:04:36 +00002801 register const unsigned char *p
2802 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002803 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002804
Guido van Rossum4c08d552000-03-10 22:55:18 +00002805 /* Shortcut for single character strings */
2806 if (PyString_GET_SIZE(self) == 1 &&
2807 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002808 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002809
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002810 /* Special case for empty strings */
2811 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002812 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002813
Guido van Rossum4c08d552000-03-10 22:55:18 +00002814 e = p + PyString_GET_SIZE(self);
2815 for (; p < e; p++) {
2816 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002817 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002818 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002819 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002820}
2821
2822
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002823PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002824"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002825\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002826Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002827at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002828
2829static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002830string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002831{
Fred Drakeba096332000-07-09 07:04:36 +00002832 register const unsigned char *p
2833 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002834 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002835 int cased;
2836
Guido van Rossum4c08d552000-03-10 22:55:18 +00002837 /* Shortcut for single character strings */
2838 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002839 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002840
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002841 /* Special case for empty strings */
2842 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002843 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002844
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845 e = p + PyString_GET_SIZE(self);
2846 cased = 0;
2847 for (; p < e; p++) {
2848 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002849 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002850 else if (!cased && islower(*p))
2851 cased = 1;
2852 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002853 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002854}
2855
2856
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002857PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002858"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002859\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002860Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002861at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002862
2863static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002864string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002865{
Fred Drakeba096332000-07-09 07:04:36 +00002866 register const unsigned char *p
2867 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002868 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869 int cased;
2870
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871 /* Shortcut for single character strings */
2872 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002873 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002874
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002875 /* Special case for empty strings */
2876 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002877 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002878
Guido van Rossum4c08d552000-03-10 22:55:18 +00002879 e = p + PyString_GET_SIZE(self);
2880 cased = 0;
2881 for (; p < e; p++) {
2882 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002883 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002884 else if (!cased && isupper(*p))
2885 cased = 1;
2886 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002887 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002888}
2889
2890
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002891PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002892"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002893\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002894Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002895may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002896ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002897
2898static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002899string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002900{
Fred Drakeba096332000-07-09 07:04:36 +00002901 register const unsigned char *p
2902 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002903 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002904 int cased, previous_is_cased;
2905
Guido van Rossum4c08d552000-03-10 22:55:18 +00002906 /* Shortcut for single character strings */
2907 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002908 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002909
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002910 /* Special case for empty strings */
2911 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002912 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002913
Guido van Rossum4c08d552000-03-10 22:55:18 +00002914 e = p + PyString_GET_SIZE(self);
2915 cased = 0;
2916 previous_is_cased = 0;
2917 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002918 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002919
2920 if (isupper(ch)) {
2921 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002922 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002923 previous_is_cased = 1;
2924 cased = 1;
2925 }
2926 else if (islower(ch)) {
2927 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002928 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002929 previous_is_cased = 1;
2930 cased = 1;
2931 }
2932 else
2933 previous_is_cased = 0;
2934 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002935 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002936}
2937
2938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002939PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002940"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002941\n\
2942Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002943Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002944is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002945
2946#define SPLIT_APPEND(data, left, right) \
2947 str = PyString_FromStringAndSize(data + left, right - left); \
2948 if (!str) \
2949 goto onError; \
2950 if (PyList_Append(list, str)) { \
2951 Py_DECREF(str); \
2952 goto onError; \
2953 } \
2954 else \
2955 Py_DECREF(str);
2956
2957static PyObject*
2958string_splitlines(PyStringObject *self, PyObject *args)
2959{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002960 register int i;
2961 register int j;
2962 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002963 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002964 PyObject *list;
2965 PyObject *str;
2966 char *data;
2967
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002968 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002969 return NULL;
2970
2971 data = PyString_AS_STRING(self);
2972 len = PyString_GET_SIZE(self);
2973
Guido van Rossum4c08d552000-03-10 22:55:18 +00002974 list = PyList_New(0);
2975 if (!list)
2976 goto onError;
2977
2978 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002979 int eol;
2980
Guido van Rossum4c08d552000-03-10 22:55:18 +00002981 /* Find a line and append it */
2982 while (i < len && data[i] != '\n' && data[i] != '\r')
2983 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002984
2985 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002986 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987 if (i < len) {
2988 if (data[i] == '\r' && i + 1 < len &&
2989 data[i+1] == '\n')
2990 i += 2;
2991 else
2992 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002993 if (keepends)
2994 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002995 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002996 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002997 j = i;
2998 }
2999 if (j < len) {
3000 SPLIT_APPEND(data, j, len);
3001 }
3002
3003 return list;
3004
3005 onError:
3006 Py_DECREF(list);
3007 return NULL;
3008}
3009
3010#undef SPLIT_APPEND
3011
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003012
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003013static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003014string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003015 /* Counterparts of the obsolete stropmodule functions; except
3016 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003017 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3018 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3019 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3020 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003021 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3022 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3023 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3024 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3025 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3026 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3027 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003028 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3029 capitalize__doc__},
3030 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3031 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3032 endswith__doc__},
3033 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3034 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3035 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3036 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3037 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3038 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3039 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3040 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3041 startswith__doc__},
3042 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3043 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3044 swapcase__doc__},
3045 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3046 translate__doc__},
3047 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3048 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3049 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3050 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3051 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3052 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3053 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3054 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3055 expandtabs__doc__},
3056 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3057 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003058 {NULL, NULL} /* sentinel */
3059};
3060
Jeremy Hylton938ace62002-07-17 16:30:39 +00003061static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003062str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3063
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003064static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003065string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003066{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003067 PyObject *x = NULL;
3068 static char *kwlist[] = {"object", 0};
3069
Guido van Rossumae960af2001-08-30 03:11:59 +00003070 if (type != &PyString_Type)
3071 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003072 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3073 return NULL;
3074 if (x == NULL)
3075 return PyString_FromString("");
3076 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003077}
3078
Guido van Rossumae960af2001-08-30 03:11:59 +00003079static PyObject *
3080str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3081{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003082 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003083 int n;
3084
3085 assert(PyType_IsSubtype(type, &PyString_Type));
3086 tmp = string_new(&PyString_Type, args, kwds);
3087 if (tmp == NULL)
3088 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003089 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003090 n = PyString_GET_SIZE(tmp);
3091 pnew = type->tp_alloc(type, n);
3092 if (pnew != NULL) {
3093 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003094 ((PyStringObject *)pnew)->ob_shash =
3095 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003096 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003097 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003098 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003099 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003100}
3101
Guido van Rossumcacfc072002-05-24 19:01:59 +00003102static PyObject *
3103basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3104{
3105 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003106 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003107 return NULL;
3108}
3109
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003110PyDoc_STRVAR(basestring_doc,
3111"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003112
3113PyTypeObject PyBaseString_Type = {
3114 PyObject_HEAD_INIT(&PyType_Type)
3115 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003116 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003117 0,
3118 0,
3119 0, /* tp_dealloc */
3120 0, /* tp_print */
3121 0, /* tp_getattr */
3122 0, /* tp_setattr */
3123 0, /* tp_compare */
3124 0, /* tp_repr */
3125 0, /* tp_as_number */
3126 0, /* tp_as_sequence */
3127 0, /* tp_as_mapping */
3128 0, /* tp_hash */
3129 0, /* tp_call */
3130 0, /* tp_str */
3131 0, /* tp_getattro */
3132 0, /* tp_setattro */
3133 0, /* tp_as_buffer */
3134 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3135 basestring_doc, /* tp_doc */
3136 0, /* tp_traverse */
3137 0, /* tp_clear */
3138 0, /* tp_richcompare */
3139 0, /* tp_weaklistoffset */
3140 0, /* tp_iter */
3141 0, /* tp_iternext */
3142 0, /* tp_methods */
3143 0, /* tp_members */
3144 0, /* tp_getset */
3145 &PyBaseObject_Type, /* tp_base */
3146 0, /* tp_dict */
3147 0, /* tp_descr_get */
3148 0, /* tp_descr_set */
3149 0, /* tp_dictoffset */
3150 0, /* tp_init */
3151 0, /* tp_alloc */
3152 basestring_new, /* tp_new */
3153 0, /* tp_free */
3154};
3155
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003156PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003157"str(object) -> string\n\
3158\n\
3159Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003160If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003161
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003162PyTypeObject PyString_Type = {
3163 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003164 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003165 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003166 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003167 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003168 (destructor)string_dealloc, /* tp_dealloc */
3169 (printfunc)string_print, /* tp_print */
3170 0, /* tp_getattr */
3171 0, /* tp_setattr */
3172 0, /* tp_compare */
3173 (reprfunc)string_repr, /* tp_repr */
3174 0, /* tp_as_number */
3175 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003176 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003177 (hashfunc)string_hash, /* tp_hash */
3178 0, /* tp_call */
3179 (reprfunc)string_str, /* tp_str */
3180 PyObject_GenericGetAttr, /* tp_getattro */
3181 0, /* tp_setattro */
3182 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00003183 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003184 string_doc, /* tp_doc */
3185 0, /* tp_traverse */
3186 0, /* tp_clear */
3187 (richcmpfunc)string_richcompare, /* tp_richcompare */
3188 0, /* tp_weaklistoffset */
3189 0, /* tp_iter */
3190 0, /* tp_iternext */
3191 string_methods, /* tp_methods */
3192 0, /* tp_members */
3193 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003194 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003195 0, /* tp_dict */
3196 0, /* tp_descr_get */
3197 0, /* tp_descr_set */
3198 0, /* tp_dictoffset */
3199 0, /* tp_init */
3200 0, /* tp_alloc */
3201 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003202 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003203};
3204
3205void
Fred Drakeba096332000-07-09 07:04:36 +00003206PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003207{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003208 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003209 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003210 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003211 if (w == NULL || !PyString_Check(*pv)) {
3212 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003213 *pv = NULL;
3214 return;
3215 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003216 v = string_concat((PyStringObject *) *pv, w);
3217 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003218 *pv = v;
3219}
3220
Guido van Rossum013142a1994-08-30 08:19:36 +00003221void
Fred Drakeba096332000-07-09 07:04:36 +00003222PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003223{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003224 PyString_Concat(pv, w);
3225 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003226}
3227
3228
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003229/* The following function breaks the notion that strings are immutable:
3230 it changes the size of a string. We get away with this only if there
3231 is only one module referencing the object. You can also think of it
3232 as creating a new string object and destroying the old one, only
3233 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003234 already be known to some other part of the code...
3235 Note that if there's not enough memory to resize the string, the original
3236 string object at *pv is deallocated, *pv is set to NULL, an "out of
3237 memory" exception is set, and -1 is returned. Else (on success) 0 is
3238 returned, and the value in *pv may or may not be the same as on input.
3239 As always, an extra byte is allocated for a trailing \0 byte (newsize
3240 does *not* include that), and a trailing \0 byte is stored.
3241*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003242
3243int
Fred Drakeba096332000-07-09 07:04:36 +00003244_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003245{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003246 register PyObject *v;
3247 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003248 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003249 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003250 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003251 Py_DECREF(v);
3252 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003253 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003254 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003255 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003256 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003257 _Py_ForgetReference(v);
3258 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003259 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003260 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003261 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003262 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003263 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003264 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003265 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003266 _Py_NewReference(*pv);
3267 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003268 sv->ob_size = newsize;
3269 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003270 return 0;
3271}
Guido van Rossume5372401993-03-16 12:15:04 +00003272
3273/* Helpers for formatstring */
3274
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003275static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003276getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003277{
3278 int argidx = *p_argidx;
3279 if (argidx < arglen) {
3280 (*p_argidx)++;
3281 if (arglen < 0)
3282 return args;
3283 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003284 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003285 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003286 PyErr_SetString(PyExc_TypeError,
3287 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003288 return NULL;
3289}
3290
Tim Peters38fd5b62000-09-21 05:43:11 +00003291/* Format codes
3292 * F_LJUST '-'
3293 * F_SIGN '+'
3294 * F_BLANK ' '
3295 * F_ALT '#'
3296 * F_ZERO '0'
3297 */
Guido van Rossume5372401993-03-16 12:15:04 +00003298#define F_LJUST (1<<0)
3299#define F_SIGN (1<<1)
3300#define F_BLANK (1<<2)
3301#define F_ALT (1<<3)
3302#define F_ZERO (1<<4)
3303
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003304static int
Fred Drakeba096332000-07-09 07:04:36 +00003305formatfloat(char *buf, size_t buflen, int flags,
3306 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003307{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003308 /* fmt = '%#.' + `prec` + `type`
3309 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003310 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003311 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003312 x = PyFloat_AsDouble(v);
3313 if (x == -1.0 && PyErr_Occurred()) {
3314 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003315 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003316 }
Guido van Rossume5372401993-03-16 12:15:04 +00003317 if (prec < 0)
3318 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003319 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3320 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003321 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3322 (flags&F_ALT) ? "#" : "",
3323 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003324 /* worst case length calc to ensure no buffer overrun:
3325 fmt = %#.<prec>g
3326 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003327 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003328 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3329 If prec=0 the effective precision is 1 (the leading digit is
3330 always given), therefore increase by one to 10+prec. */
3331 if (buflen <= (size_t)10 + (size_t)prec) {
3332 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003333 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003334 return -1;
3335 }
Tim Peters885d4572001-11-28 20:27:42 +00003336 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003337 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003338}
3339
Tim Peters38fd5b62000-09-21 05:43:11 +00003340/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3341 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3342 * Python's regular ints.
3343 * Return value: a new PyString*, or NULL if error.
3344 * . *pbuf is set to point into it,
3345 * *plen set to the # of chars following that.
3346 * Caller must decref it when done using pbuf.
3347 * The string starting at *pbuf is of the form
3348 * "-"? ("0x" | "0X")? digit+
3349 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003350 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003351 * There will be at least prec digits, zero-filled on the left if
3352 * necessary to get that many.
3353 * val object to be converted
3354 * flags bitmask of format flags; only F_ALT is looked at
3355 * prec minimum number of digits; 0-fill on left if needed
3356 * type a character in [duoxX]; u acts the same as d
3357 *
3358 * CAUTION: o, x and X conversions on regular ints can never
3359 * produce a '-' sign, but can for Python's unbounded ints.
3360 */
3361PyObject*
3362_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3363 char **pbuf, int *plen)
3364{
3365 PyObject *result = NULL;
3366 char *buf;
3367 int i;
3368 int sign; /* 1 if '-', else 0 */
3369 int len; /* number of characters */
3370 int numdigits; /* len == numnondigits + numdigits */
3371 int numnondigits = 0;
3372
3373 switch (type) {
3374 case 'd':
3375 case 'u':
3376 result = val->ob_type->tp_str(val);
3377 break;
3378 case 'o':
3379 result = val->ob_type->tp_as_number->nb_oct(val);
3380 break;
3381 case 'x':
3382 case 'X':
3383 numnondigits = 2;
3384 result = val->ob_type->tp_as_number->nb_hex(val);
3385 break;
3386 default:
3387 assert(!"'type' not in [duoxX]");
3388 }
3389 if (!result)
3390 return NULL;
3391
3392 /* To modify the string in-place, there can only be one reference. */
3393 if (result->ob_refcnt != 1) {
3394 PyErr_BadInternalCall();
3395 return NULL;
3396 }
3397 buf = PyString_AsString(result);
3398 len = PyString_Size(result);
3399 if (buf[len-1] == 'L') {
3400 --len;
3401 buf[len] = '\0';
3402 }
3403 sign = buf[0] == '-';
3404 numnondigits += sign;
3405 numdigits = len - numnondigits;
3406 assert(numdigits > 0);
3407
Tim Petersfff53252001-04-12 18:38:48 +00003408 /* Get rid of base marker unless F_ALT */
3409 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003410 /* Need to skip 0x, 0X or 0. */
3411 int skipped = 0;
3412 switch (type) {
3413 case 'o':
3414 assert(buf[sign] == '0');
3415 /* If 0 is only digit, leave it alone. */
3416 if (numdigits > 1) {
3417 skipped = 1;
3418 --numdigits;
3419 }
3420 break;
3421 case 'x':
3422 case 'X':
3423 assert(buf[sign] == '0');
3424 assert(buf[sign + 1] == 'x');
3425 skipped = 2;
3426 numnondigits -= 2;
3427 break;
3428 }
3429 if (skipped) {
3430 buf += skipped;
3431 len -= skipped;
3432 if (sign)
3433 buf[0] = '-';
3434 }
3435 assert(len == numnondigits + numdigits);
3436 assert(numdigits > 0);
3437 }
3438
3439 /* Fill with leading zeroes to meet minimum width. */
3440 if (prec > numdigits) {
3441 PyObject *r1 = PyString_FromStringAndSize(NULL,
3442 numnondigits + prec);
3443 char *b1;
3444 if (!r1) {
3445 Py_DECREF(result);
3446 return NULL;
3447 }
3448 b1 = PyString_AS_STRING(r1);
3449 for (i = 0; i < numnondigits; ++i)
3450 *b1++ = *buf++;
3451 for (i = 0; i < prec - numdigits; i++)
3452 *b1++ = '0';
3453 for (i = 0; i < numdigits; i++)
3454 *b1++ = *buf++;
3455 *b1 = '\0';
3456 Py_DECREF(result);
3457 result = r1;
3458 buf = PyString_AS_STRING(result);
3459 len = numnondigits + prec;
3460 }
3461
3462 /* Fix up case for hex conversions. */
3463 switch (type) {
3464 case 'x':
3465 /* Need to convert all upper case letters to lower case. */
3466 for (i = 0; i < len; i++)
3467 if (buf[i] >= 'A' && buf[i] <= 'F')
3468 buf[i] += 'a'-'A';
3469 break;
3470 case 'X':
3471 /* Need to convert 0x to 0X (and -0x to -0X). */
3472 if (buf[sign + 1] == 'x')
3473 buf[sign + 1] = 'X';
3474 break;
3475 }
3476 *pbuf = buf;
3477 *plen = len;
3478 return result;
3479}
3480
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003481static int
Fred Drakeba096332000-07-09 07:04:36 +00003482formatint(char *buf, size_t buflen, int flags,
3483 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003484{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003485 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003486 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3487 + 1 + 1 = 24 */
3488 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003489 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003490
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003491 x = PyInt_AsLong(v);
3492 if (x == -1 && PyErr_Occurred()) {
3493 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003494 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003495 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003496 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003497 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003498 "%u/%o/%x/%X of negative int will return "
3499 "a signed string in Python 2.4 and up") < 0)
3500 return -1;
3501 }
Guido van Rossume5372401993-03-16 12:15:04 +00003502 if (prec < 0)
3503 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003504
3505 if ((flags & F_ALT) &&
3506 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003507 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003508 * of issues that cause pain:
3509 * - when 0 is being converted, the C standard leaves off
3510 * the '0x' or '0X', which is inconsistent with other
3511 * %#x/%#X conversions and inconsistent with Python's
3512 * hex() function
3513 * - there are platforms that violate the standard and
3514 * convert 0 with the '0x' or '0X'
3515 * (Metrowerks, Compaq Tru64)
3516 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003517 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003518 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003519 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003520 * We can achieve the desired consistency by inserting our
3521 * own '0x' or '0X' prefix, and substituting %x/%X in place
3522 * of %#x/%#X.
3523 *
3524 * Note that this is the same approach as used in
3525 * formatint() in unicodeobject.c
3526 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003527 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003528 type, prec, type);
3529 }
3530 else {
3531 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003532 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003533 prec, type);
3534 }
3535
Tim Peters38fd5b62000-09-21 05:43:11 +00003536 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003537 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3538 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003539 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003540 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003541 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003542 return -1;
3543 }
Tim Peters885d4572001-11-28 20:27:42 +00003544 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003545 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003546}
3547
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003548static int
Fred Drakeba096332000-07-09 07:04:36 +00003549formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003550{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003551 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003552 if (PyString_Check(v)) {
3553 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003554 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003555 }
3556 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003557 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003558 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003559 }
3560 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003561 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003562}
3563
Guido van Rossum013142a1994-08-30 08:19:36 +00003564
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003565/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3566
3567 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3568 chars are formatted. XXX This is a magic number. Each formatting
3569 routine does bounds checking to ensure no overflow, but a better
3570 solution may be to malloc a buffer of appropriate size for each
3571 format. For now, the current solution is sufficient.
3572*/
3573#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003574
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003575PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003576PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003577{
3578 char *fmt, *res;
3579 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003580 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003581 PyObject *result, *orig_args;
3582#ifdef Py_USING_UNICODE
3583 PyObject *v, *w;
3584#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003585 PyObject *dict = NULL;
3586 if (format == NULL || !PyString_Check(format) || args == NULL) {
3587 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003588 return NULL;
3589 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003590 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003591 fmt = PyString_AS_STRING(format);
3592 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003593 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003594 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003595 if (result == NULL)
3596 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003597 res = PyString_AsString(result);
3598 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003599 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003600 argidx = 0;
3601 }
3602 else {
3603 arglen = -1;
3604 argidx = -2;
3605 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003606 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003607 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003608 while (--fmtcnt >= 0) {
3609 if (*fmt != '%') {
3610 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003611 rescnt = fmtcnt + 100;
3612 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003613 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003614 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003615 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003616 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003617 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003618 }
3619 *res++ = *fmt++;
3620 }
3621 else {
3622 /* Got a format specifier */
3623 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003624 int width = -1;
3625 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003626 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003627 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003628 PyObject *v = NULL;
3629 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003630 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003631 int sign;
3632 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003633 char formatbuf[FORMATBUFLEN];
3634 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003635#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003636 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003637 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003638#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003639
Guido van Rossumda9c2711996-12-05 21:58:58 +00003640 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003641 if (*fmt == '(') {
3642 char *keystart;
3643 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003644 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003645 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003646
3647 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003648 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003649 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003650 goto error;
3651 }
3652 ++fmt;
3653 --fmtcnt;
3654 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003655 /* Skip over balanced parentheses */
3656 while (pcount > 0 && --fmtcnt >= 0) {
3657 if (*fmt == ')')
3658 --pcount;
3659 else if (*fmt == '(')
3660 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003661 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003662 }
3663 keylen = fmt - keystart - 1;
3664 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003665 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003666 "incomplete format key");
3667 goto error;
3668 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003669 key = PyString_FromStringAndSize(keystart,
3670 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003671 if (key == NULL)
3672 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003673 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003674 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003675 args_owned = 0;
3676 }
3677 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003678 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003679 if (args == NULL) {
3680 goto error;
3681 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003682 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003683 arglen = -1;
3684 argidx = -2;
3685 }
Guido van Rossume5372401993-03-16 12:15:04 +00003686 while (--fmtcnt >= 0) {
3687 switch (c = *fmt++) {
3688 case '-': flags |= F_LJUST; continue;
3689 case '+': flags |= F_SIGN; continue;
3690 case ' ': flags |= F_BLANK; continue;
3691 case '#': flags |= F_ALT; continue;
3692 case '0': flags |= F_ZERO; continue;
3693 }
3694 break;
3695 }
3696 if (c == '*') {
3697 v = getnextarg(args, arglen, &argidx);
3698 if (v == NULL)
3699 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003700 if (!PyInt_Check(v)) {
3701 PyErr_SetString(PyExc_TypeError,
3702 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003703 goto error;
3704 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003705 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003706 if (width < 0) {
3707 flags |= F_LJUST;
3708 width = -width;
3709 }
Guido van Rossume5372401993-03-16 12:15:04 +00003710 if (--fmtcnt >= 0)
3711 c = *fmt++;
3712 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003713 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003714 width = c - '0';
3715 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003716 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003717 if (!isdigit(c))
3718 break;
3719 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003720 PyErr_SetString(
3721 PyExc_ValueError,
3722 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003723 goto error;
3724 }
3725 width = width*10 + (c - '0');
3726 }
3727 }
3728 if (c == '.') {
3729 prec = 0;
3730 if (--fmtcnt >= 0)
3731 c = *fmt++;
3732 if (c == '*') {
3733 v = getnextarg(args, arglen, &argidx);
3734 if (v == NULL)
3735 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003736 if (!PyInt_Check(v)) {
3737 PyErr_SetString(
3738 PyExc_TypeError,
3739 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003740 goto error;
3741 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003742 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003743 if (prec < 0)
3744 prec = 0;
3745 if (--fmtcnt >= 0)
3746 c = *fmt++;
3747 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003748 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003749 prec = c - '0';
3750 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003751 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003752 if (!isdigit(c))
3753 break;
3754 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003755 PyErr_SetString(
3756 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003757 "prec too big");
3758 goto error;
3759 }
3760 prec = prec*10 + (c - '0');
3761 }
3762 }
3763 } /* prec */
3764 if (fmtcnt >= 0) {
3765 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003766 if (--fmtcnt >= 0)
3767 c = *fmt++;
3768 }
3769 }
3770 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003771 PyErr_SetString(PyExc_ValueError,
3772 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003773 goto error;
3774 }
3775 if (c != '%') {
3776 v = getnextarg(args, arglen, &argidx);
3777 if (v == NULL)
3778 goto error;
3779 }
3780 sign = 0;
3781 fill = ' ';
3782 switch (c) {
3783 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003784 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003785 len = 1;
3786 break;
3787 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003788 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003789#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003790 if (PyUnicode_Check(v)) {
3791 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003792 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003793 goto unicode;
3794 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003795#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003796 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003797 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003798 else
3799 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003800 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003801 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003802 if (!PyString_Check(temp)) {
3803 PyErr_SetString(PyExc_TypeError,
3804 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003805 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003806 goto error;
3807 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003808 pbuf = PyString_AS_STRING(temp);
3809 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003810 if (prec >= 0 && len > prec)
3811 len = prec;
3812 break;
3813 case 'i':
3814 case 'd':
3815 case 'u':
3816 case 'o':
3817 case 'x':
3818 case 'X':
3819 if (c == 'i')
3820 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003821 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003822 temp = _PyString_FormatLong(v, flags,
3823 prec, c, &pbuf, &len);
3824 if (!temp)
3825 goto error;
3826 /* unbounded ints can always produce
3827 a sign character! */
3828 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003829 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003830 else {
3831 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003832 len = formatint(pbuf,
3833 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003834 flags, prec, c, v);
3835 if (len < 0)
3836 goto error;
3837 /* only d conversion is signed */
3838 sign = c == 'd';
3839 }
3840 if (flags & F_ZERO)
3841 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003842 break;
3843 case 'e':
3844 case 'E':
3845 case 'f':
3846 case 'g':
3847 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003848 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003849 len = formatfloat(pbuf, sizeof(formatbuf),
3850 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003851 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003852 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003853 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003854 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003855 fill = '0';
3856 break;
3857 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003858 pbuf = formatbuf;
3859 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003860 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003861 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003862 break;
3863 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003864 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003865 "unsupported format character '%c' (0x%x) "
3866 "at index %i",
3867 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003868 goto error;
3869 }
3870 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003871 if (*pbuf == '-' || *pbuf == '+') {
3872 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003873 len--;
3874 }
3875 else if (flags & F_SIGN)
3876 sign = '+';
3877 else if (flags & F_BLANK)
3878 sign = ' ';
3879 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003880 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003881 }
3882 if (width < len)
3883 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003884 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003885 reslen -= rescnt;
3886 rescnt = width + fmtcnt + 100;
3887 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003888 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003889 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003890 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003891 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003892 }
3893 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003894 if (fill != ' ')
3895 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003896 rescnt--;
3897 if (width > len)
3898 width--;
3899 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003900 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3901 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003902 assert(pbuf[1] == c);
3903 if (fill != ' ') {
3904 *res++ = *pbuf++;
3905 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003906 }
Tim Petersfff53252001-04-12 18:38:48 +00003907 rescnt -= 2;
3908 width -= 2;
3909 if (width < 0)
3910 width = 0;
3911 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003912 }
3913 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003914 do {
3915 --rescnt;
3916 *res++ = fill;
3917 } while (--width > len);
3918 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003919 if (fill == ' ') {
3920 if (sign)
3921 *res++ = sign;
3922 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003923 (c == 'x' || c == 'X')) {
3924 assert(pbuf[0] == '0');
3925 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003926 *res++ = *pbuf++;
3927 *res++ = *pbuf++;
3928 }
3929 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003930 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003931 res += len;
3932 rescnt -= len;
3933 while (--width >= len) {
3934 --rescnt;
3935 *res++ = ' ';
3936 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003937 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003938 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003939 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003940 goto error;
3941 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003942 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003943 } /* '%' */
3944 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003945 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003946 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003947 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003948 goto error;
3949 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003950 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003951 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003952 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003953 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003954 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003955
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003956#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003957 unicode:
3958 if (args_owned) {
3959 Py_DECREF(args);
3960 args_owned = 0;
3961 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003962 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003963 if (PyTuple_Check(orig_args) && argidx > 0) {
3964 PyObject *v;
3965 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3966 v = PyTuple_New(n);
3967 if (v == NULL)
3968 goto error;
3969 while (--n >= 0) {
3970 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3971 Py_INCREF(w);
3972 PyTuple_SET_ITEM(v, n, w);
3973 }
3974 args = v;
3975 } else {
3976 Py_INCREF(orig_args);
3977 args = orig_args;
3978 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003979 args_owned = 1;
3980 /* Take what we have of the result and let the Unicode formatting
3981 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003982 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003983 if (_PyString_Resize(&result, rescnt))
3984 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003985 fmtcnt = PyString_GET_SIZE(format) - \
3986 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003987 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3988 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003989 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003990 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003991 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003992 if (v == NULL)
3993 goto error;
3994 /* Paste what we have (result) to what the Unicode formatting
3995 function returned (v) and return the result (or error) */
3996 w = PyUnicode_Concat(result, v);
3997 Py_DECREF(result);
3998 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003999 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004000 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004001#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004002
Guido van Rossume5372401993-03-16 12:15:04 +00004003 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004004 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004005 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004006 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004007 }
Guido van Rossume5372401993-03-16 12:15:04 +00004008 return NULL;
4009}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004010
Guido van Rossum2a61e741997-01-18 07:55:05 +00004011void
Fred Drakeba096332000-07-09 07:04:36 +00004012PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004013{
4014 register PyStringObject *s = (PyStringObject *)(*p);
4015 PyObject *t;
4016 if (s == NULL || !PyString_Check(s))
4017 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004018 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004019 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004020 if (interned == NULL) {
4021 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004022 if (interned == NULL) {
4023 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004024 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004025 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004026 }
4027 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4028 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004029 Py_DECREF(*p);
4030 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004031 return;
4032 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004033 /* Ensure that only true string objects appear in the intern dict */
4034 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004035 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4036 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004037 if (t == NULL) {
4038 PyErr_Clear();
4039 return;
Tim Peters111f6092001-09-12 07:54:51 +00004040 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004041 } else {
4042 t = (PyObject*) s;
4043 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004044 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004045
4046 if (PyDict_SetItem(interned, t, t) == 0) {
4047 /* The two references in interned are not counted by
4048 refcnt. The string deallocator will take care of this */
4049 ((PyObject *)t)->ob_refcnt-=2;
4050 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4051 Py_DECREF(*p);
4052 *p = t;
4053 return;
4054 }
4055 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004056 PyErr_Clear();
4057}
4058
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004059void
4060PyString_InternImmortal(PyObject **p)
4061{
4062 PyString_InternInPlace(p);
4063 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4064 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4065 Py_INCREF(*p);
4066 }
4067}
4068
Guido van Rossum2a61e741997-01-18 07:55:05 +00004069
4070PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004071PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004072{
4073 PyObject *s = PyString_FromString(cp);
4074 if (s == NULL)
4075 return NULL;
4076 PyString_InternInPlace(&s);
4077 return s;
4078}
4079
Guido van Rossum8cf04761997-08-02 02:57:45 +00004080void
Fred Drakeba096332000-07-09 07:04:36 +00004081PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004082{
4083 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004084 for (i = 0; i < UCHAR_MAX + 1; i++) {
4085 Py_XDECREF(characters[i]);
4086 characters[i] = NULL;
4087 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004088 Py_XDECREF(nullstring);
4089 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004090}
Barry Warsawa903ad982001-02-23 16:40:48 +00004091
Barry Warsawa903ad982001-02-23 16:40:48 +00004092void _Py_ReleaseInternedStrings(void)
4093{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004094 PyObject *keys;
4095 PyStringObject *s;
4096 int i, n;
4097
4098 if (interned == NULL || !PyDict_Check(interned))
4099 return;
4100 keys = PyDict_Keys(interned);
4101 if (keys == NULL || !PyList_Check(keys)) {
4102 PyErr_Clear();
4103 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004104 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004105
4106 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4107 detector, interned strings are not forcibly deallocated; rather, we
4108 give them their stolen references back, and then clear and DECREF
4109 the interned dict. */
4110
4111 fprintf(stderr, "releasing interned strings\n");
4112 n = PyList_GET_SIZE(keys);
4113 for (i = 0; i < n; i++) {
4114 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4115 switch (s->ob_sstate) {
4116 case SSTATE_NOT_INTERNED:
4117 /* XXX Shouldn't happen */
4118 break;
4119 case SSTATE_INTERNED_IMMORTAL:
4120 s->ob_refcnt += 1;
4121 break;
4122 case SSTATE_INTERNED_MORTAL:
4123 s->ob_refcnt += 2;
4124 break;
4125 default:
4126 Py_FatalError("Inconsistent interned string state.");
4127 }
4128 s->ob_sstate = SSTATE_NOT_INTERNED;
4129 }
4130 Py_DECREF(keys);
4131 PyDict_Clear(interned);
4132 Py_DECREF(interned);
4133 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004134}