blob: 31f188a5b9e8ae5767dee7301aaf95e1a350c855 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
536 v = PyString_FromStringAndSize((char *)NULL,
537 recode_encoding ? 4*len:len);
538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
544#ifdef Py_USING_UNICODE
545 if (recode_encoding && (*s & 0x80)) {
546 PyObject *u, *w;
547 char *r;
548 const char* t;
549 int rn;
550 t = s;
551 /* Decode non-ASCII bytes as UTF-8. */
552 while (t < end && (*t & 0x80)) t++;
553 u = PyUnicode_DecodeUTF8(s, t - s, errors);
554 if(!u) goto failed;
555
556 /* Recode them in target encoding. */
557 w = PyUnicode_AsEncodedString(
558 u, recode_encoding, errors);
559 Py_DECREF(u);
560 if (!w) goto failed;
561
562 /* Append bytes to output buffer. */
563 r = PyString_AsString(w);
564 rn = PyString_Size(w);
565 memcpy(p, r, rn);
566 p += rn;
567 Py_DECREF(w);
568 s = t;
569 } else {
570 *p++ = *s++;
571 }
572#else
573 *p++ = *s++;
574#endif
575 continue;
576 }
577 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000578 if (s==end) {
579 PyErr_SetString(PyExc_ValueError,
580 "Trailing \\ in string");
581 goto failed;
582 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000583 switch (*s++) {
584 /* XXX This assumes ASCII! */
585 case '\n': break;
586 case '\\': *p++ = '\\'; break;
587 case '\'': *p++ = '\''; break;
588 case '\"': *p++ = '\"'; break;
589 case 'b': *p++ = '\b'; break;
590 case 'f': *p++ = '\014'; break; /* FF */
591 case 't': *p++ = '\t'; break;
592 case 'n': *p++ = '\n'; break;
593 case 'r': *p++ = '\r'; break;
594 case 'v': *p++ = '\013'; break; /* VT */
595 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
596 case '0': case '1': case '2': case '3':
597 case '4': case '5': case '6': case '7':
598 c = s[-1] - '0';
599 if ('0' <= *s && *s <= '7') {
600 c = (c<<3) + *s++ - '0';
601 if ('0' <= *s && *s <= '7')
602 c = (c<<3) + *s++ - '0';
603 }
604 *p++ = c;
605 break;
606 case 'x':
607 if (isxdigit(Py_CHARMASK(s[0]))
608 && isxdigit(Py_CHARMASK(s[1]))) {
609 unsigned int x = 0;
610 c = Py_CHARMASK(*s);
611 s++;
612 if (isdigit(c))
613 x = c - '0';
614 else if (islower(c))
615 x = 10 + c - 'a';
616 else
617 x = 10 + c - 'A';
618 x = x << 4;
619 c = Py_CHARMASK(*s);
620 s++;
621 if (isdigit(c))
622 x += c - '0';
623 else if (islower(c))
624 x += 10 + c - 'a';
625 else
626 x += 10 + c - 'A';
627 *p++ = x;
628 break;
629 }
630 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 PyErr_SetString(PyExc_ValueError,
632 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000633 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 }
635 if (strcmp(errors, "replace") == 0) {
636 *p++ = '?';
637 } else if (strcmp(errors, "ignore") == 0)
638 /* do nothing */;
639 else {
640 PyErr_Format(PyExc_ValueError,
641 "decoding error; "
642 "unknown error handling code: %.400s",
643 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000644 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000645 }
646#ifndef Py_USING_UNICODE
647 case 'u':
648 case 'U':
649 case 'N':
650 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "Unicode escapes not legal "
653 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656#endif
657 default:
658 *p++ = '\\';
659 *p++ = s[-1];
660 break;
661 }
662 }
663 _PyString_Resize(&v, (int)(p - buf));
664 return v;
665 failed:
666 Py_DECREF(v);
667 return NULL;
668}
669
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000670static int
671string_getsize(register PyObject *op)
672{
673 char *s;
674 int len;
675 if (PyString_AsStringAndSize(op, &s, &len))
676 return -1;
677 return len;
678}
679
680static /*const*/ char *
681string_getbuffer(register PyObject *op)
682{
683 char *s;
684 int len;
685 if (PyString_AsStringAndSize(op, &s, &len))
686 return NULL;
687 return s;
688}
689
Guido van Rossumd7047b31995-01-02 19:07:15 +0000690int
Fred Drakeba096332000-07-09 07:04:36 +0000691PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000692{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000693 if (!PyString_Check(op))
694 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000695 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696}
697
698/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000699PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000701 if (!PyString_Check(op))
702 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000703 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704}
705
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000706int
707PyString_AsStringAndSize(register PyObject *obj,
708 register char **s,
709 register int *len)
710{
711 if (s == NULL) {
712 PyErr_BadInternalCall();
713 return -1;
714 }
715
716 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000717#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (PyUnicode_Check(obj)) {
719 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
720 if (obj == NULL)
721 return -1;
722 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000723 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000724#endif
725 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 PyErr_Format(PyExc_TypeError,
727 "expected string or Unicode object, "
728 "%.200s found", obj->ob_type->tp_name);
729 return -1;
730 }
731 }
732
733 *s = PyString_AS_STRING(obj);
734 if (len != NULL)
735 *len = PyString_GET_SIZE(obj);
736 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
737 PyErr_SetString(PyExc_TypeError,
738 "expected string without null bytes");
739 return -1;
740 }
741 return 0;
742}
743
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000744/* Methods */
745
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000746static int
Fred Drakeba096332000-07-09 07:04:36 +0000747string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748{
749 int i;
750 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000751 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000752
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000753 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000754 if (! PyString_CheckExact(op)) {
755 int ret;
756 /* A str subclass may have its own __str__ method. */
757 op = (PyStringObject *) PyObject_Str((PyObject *)op);
758 if (op == NULL)
759 return -1;
760 ret = string_print(op, fp, flags);
761 Py_DECREF(op);
762 return ret;
763 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000764 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000765 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000766 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000767 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000768
Thomas Wouters7e474022000-07-16 12:04:32 +0000769 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000770 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000771 if (memchr(op->ob_sval, '\'', op->ob_size) &&
772 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000773 quote = '"';
774
775 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000776 for (i = 0; i < op->ob_size; i++) {
777 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000778 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000780 else if (c == '\t')
781 fprintf(fp, "\\t");
782 else if (c == '\n')
783 fprintf(fp, "\\n");
784 else if (c == '\r')
785 fprintf(fp, "\\r");
786 else if (c < ' ' || c >= 0x7f)
787 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000789 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000791 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000792 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793}
794
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000795PyObject *
796PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000798 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000799 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
800 PyObject *v;
801 if (newsize > INT_MAX) {
802 PyErr_SetString(PyExc_OverflowError,
803 "string is too large to make repr");
804 }
805 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000806 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000807 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808 }
809 else {
810 register int i;
811 register char c;
812 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000813 int quote;
814
Thomas Wouters7e474022000-07-16 12:04:32 +0000815 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000817 if (smartquotes &&
818 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000819 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 quote = '"';
821
Tim Peters9161c8b2001-12-03 01:55:38 +0000822 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000824 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000825 /* There's at least enough room for a hex escape
826 and a closing quote. */
827 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000829 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000831 else if (c == '\t')
832 *p++ = '\\', *p++ = 't';
833 else if (c == '\n')
834 *p++ = '\\', *p++ = 'n';
835 else if (c == '\r')
836 *p++ = '\\', *p++ = 'r';
837 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000838 /* For performance, we don't want to call
839 PyOS_snprintf here (extra layers of
840 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000841 sprintf(p, "\\x%02x", c & 0xff);
842 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843 }
844 else
845 *p++ = c;
846 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000847 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000848 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000850 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000851 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000852 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854}
855
Guido van Rossum189f1df2001-05-01 16:51:53 +0000856static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000857string_repr(PyObject *op)
858{
859 return PyString_Repr(op, 1);
860}
861
862static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863string_str(PyObject *s)
864{
Tim Petersc9933152001-10-16 20:18:24 +0000865 assert(PyString_Check(s));
866 if (PyString_CheckExact(s)) {
867 Py_INCREF(s);
868 return s;
869 }
870 else {
871 /* Subtype -- return genuine string with the same value. */
872 PyStringObject *t = (PyStringObject *) s;
873 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
874 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000875}
876
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877static int
Fred Drakeba096332000-07-09 07:04:36 +0000878string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879{
880 return a->ob_size;
881}
882
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000883static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000884string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000885{
886 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000887 register PyStringObject *op;
888 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000889#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000890 if (PyUnicode_Check(bb))
891 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000892#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000893 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000894 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000895 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 return NULL;
897 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000898#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000900 if ((a->ob_size == 0 || b->ob_size == 0) &&
901 PyString_CheckExact(a) && PyString_CheckExact(b)) {
902 if (a->ob_size == 0) {
903 Py_INCREF(bb);
904 return bb;
905 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000906 Py_INCREF(a);
907 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000908 }
909 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000910 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000911 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000912 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000915 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000916 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000917 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000918 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
919 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
920 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922#undef b
923}
924
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000926string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927{
928 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000929 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000931 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000932 if (n < 0)
933 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000934 /* watch out for overflows: the size can overflow int,
935 * and the # of bytes needed can overflow size_t
936 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000938 if (n && size / n != a->ob_size) {
939 PyErr_SetString(PyExc_OverflowError,
940 "repeated string is too long");
941 return NULL;
942 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000943 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944 Py_INCREF(a);
945 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 }
Tim Peters8f422462000-09-09 06:13:41 +0000947 nbytes = size * sizeof(char);
948 if (nbytes / sizeof(char) != (size_t)size ||
949 nbytes + sizeof(PyStringObject) <= nbytes) {
950 PyErr_SetString(PyExc_OverflowError,
951 "repeated string is too long");
952 return NULL;
953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000955 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000956 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000958 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000960 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000961 for (i = 0; i < size; i += a->ob_size)
962 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
963 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965}
966
967/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
968
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000970string_slice(register PyStringObject *a, register int i, register int j)
971 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000972{
973 if (i < 0)
974 i = 0;
975 if (j < 0)
976 j = 0; /* Avoid signed/unsigned bug in next line */
977 if (j > a->ob_size)
978 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000979 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
980 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981 Py_INCREF(a);
982 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000983 }
984 if (j < i)
985 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000986 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987}
988
Guido van Rossum9284a572000-03-07 15:53:43 +0000989static int
Fred Drakeba096332000-07-09 07:04:36 +0000990string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000991{
Barry Warsaw817918c2002-08-06 16:58:21 +0000992 const char *lhs, *rhs, *end;
993 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000994
995 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000996#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000997 if (PyUnicode_Check(el))
998 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000999#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001000 if (!PyString_Check(el)) {
1001 PyErr_SetString(PyExc_TypeError,
1002 "'in <string>' requires string as left operand");
1003 return -1;
1004 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001005 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001006 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001007 rhs = PyString_AS_STRING(el);
1008 lhs = PyString_AS_STRING(a);
1009
1010 /* optimize for a single character */
1011 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001012 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001013
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001015 while (lhs <= end) {
1016 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001017 return 1;
1018 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001019
Guido van Rossum9284a572000-03-07 15:53:43 +00001020 return 0;
1021}
1022
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001024string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001025{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001026 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001027 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001029 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001030 return NULL;
1031 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001032 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001033 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001034 if (v == NULL)
1035 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001036 else {
1037#ifdef COUNT_ALLOCS
1038 one_strings++;
1039#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001040 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001041 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001042 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001043}
1044
Martin v. Löwiscd353062001-05-24 16:56:35 +00001045static PyObject*
1046string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001047{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001048 int c;
1049 int len_a, len_b;
1050 int min_len;
1051 PyObject *result;
1052
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001053 /* Make sure both arguments are strings. */
1054 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001055 result = Py_NotImplemented;
1056 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001057 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001058 if (a == b) {
1059 switch (op) {
1060 case Py_EQ:case Py_LE:case Py_GE:
1061 result = Py_True;
1062 goto out;
1063 case Py_NE:case Py_LT:case Py_GT:
1064 result = Py_False;
1065 goto out;
1066 }
1067 }
1068 if (op == Py_EQ) {
1069 /* Supporting Py_NE here as well does not save
1070 much time, since Py_NE is rarely used. */
1071 if (a->ob_size == b->ob_size
1072 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001073 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001074 a->ob_size) == 0)) {
1075 result = Py_True;
1076 } else {
1077 result = Py_False;
1078 }
1079 goto out;
1080 }
1081 len_a = a->ob_size; len_b = b->ob_size;
1082 min_len = (len_a < len_b) ? len_a : len_b;
1083 if (min_len > 0) {
1084 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1085 if (c==0)
1086 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1087 }else
1088 c = 0;
1089 if (c == 0)
1090 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1091 switch (op) {
1092 case Py_LT: c = c < 0; break;
1093 case Py_LE: c = c <= 0; break;
1094 case Py_EQ: assert(0); break; /* unreachable */
1095 case Py_NE: c = c != 0; break;
1096 case Py_GT: c = c > 0; break;
1097 case Py_GE: c = c >= 0; break;
1098 default:
1099 result = Py_NotImplemented;
1100 goto out;
1101 }
1102 result = c ? Py_True : Py_False;
1103 out:
1104 Py_INCREF(result);
1105 return result;
1106}
1107
1108int
1109_PyString_Eq(PyObject *o1, PyObject *o2)
1110{
1111 PyStringObject *a, *b;
1112 a = (PyStringObject*)o1;
1113 b = (PyStringObject*)o2;
1114 return a->ob_size == b->ob_size
1115 && *a->ob_sval == *b->ob_sval
1116 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001117}
1118
Guido van Rossum9bfef441993-03-29 10:43:31 +00001119static long
Fred Drakeba096332000-07-09 07:04:36 +00001120string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001121{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001122 register int len;
1123 register unsigned char *p;
1124 register long x;
1125
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001126 if (a->ob_shash != -1)
1127 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001128 len = a->ob_size;
1129 p = (unsigned char *) a->ob_sval;
1130 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001131 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001132 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001133 x ^= a->ob_size;
1134 if (x == -1)
1135 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001136 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001137 return x;
1138}
1139
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001140static PyObject*
1141string_subscript(PyStringObject* self, PyObject* item)
1142{
1143 if (PyInt_Check(item)) {
1144 long i = PyInt_AS_LONG(item);
1145 if (i < 0)
1146 i += PyString_GET_SIZE(self);
1147 return string_item(self,i);
1148 }
1149 else if (PyLong_Check(item)) {
1150 long i = PyLong_AsLong(item);
1151 if (i == -1 && PyErr_Occurred())
1152 return NULL;
1153 if (i < 0)
1154 i += PyString_GET_SIZE(self);
1155 return string_item(self,i);
1156 }
1157 else if (PySlice_Check(item)) {
1158 int start, stop, step, slicelength, cur, i;
1159 char* source_buf;
1160 char* result_buf;
1161 PyObject* result;
1162
1163 if (PySlice_GetIndicesEx((PySliceObject*)item,
1164 PyString_GET_SIZE(self),
1165 &start, &stop, &step, &slicelength) < 0) {
1166 return NULL;
1167 }
1168
1169 if (slicelength <= 0) {
1170 return PyString_FromStringAndSize("", 0);
1171 }
1172 else {
1173 source_buf = PyString_AsString((PyObject*)self);
1174 result_buf = PyMem_Malloc(slicelength);
1175
1176 for (cur = start, i = 0; i < slicelength;
1177 cur += step, i++) {
1178 result_buf[i] = source_buf[cur];
1179 }
1180
1181 result = PyString_FromStringAndSize(result_buf,
1182 slicelength);
1183 PyMem_Free(result_buf);
1184 return result;
1185 }
1186 }
1187 else {
1188 PyErr_SetString(PyExc_TypeError,
1189 "string indices must be integers");
1190 return NULL;
1191 }
1192}
1193
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001194static int
Fred Drakeba096332000-07-09 07:04:36 +00001195string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001196{
1197 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001198 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001199 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001200 return -1;
1201 }
1202 *ptr = (void *)self->ob_sval;
1203 return self->ob_size;
1204}
1205
1206static int
Fred Drakeba096332000-07-09 07:04:36 +00001207string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001208{
Guido van Rossum045e6881997-09-08 18:30:11 +00001209 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001210 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001211 return -1;
1212}
1213
1214static int
Fred Drakeba096332000-07-09 07:04:36 +00001215string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001216{
1217 if ( lenp )
1218 *lenp = self->ob_size;
1219 return 1;
1220}
1221
Guido van Rossum1db70701998-10-08 02:18:52 +00001222static int
Fred Drakeba096332000-07-09 07:04:36 +00001223string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001224{
1225 if ( index != 0 ) {
1226 PyErr_SetString(PyExc_SystemError,
1227 "accessing non-existent string segment");
1228 return -1;
1229 }
1230 *ptr = self->ob_sval;
1231 return self->ob_size;
1232}
1233
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001234static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001235 (inquiry)string_length, /*sq_length*/
1236 (binaryfunc)string_concat, /*sq_concat*/
1237 (intargfunc)string_repeat, /*sq_repeat*/
1238 (intargfunc)string_item, /*sq_item*/
1239 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001240 0, /*sq_ass_item*/
1241 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001242 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001243};
1244
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001245static PyMappingMethods string_as_mapping = {
1246 (inquiry)string_length,
1247 (binaryfunc)string_subscript,
1248 0,
1249};
1250
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001251static PyBufferProcs string_as_buffer = {
1252 (getreadbufferproc)string_buffer_getreadbuf,
1253 (getwritebufferproc)string_buffer_getwritebuf,
1254 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001255 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001256};
1257
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001258
1259
1260#define LEFTSTRIP 0
1261#define RIGHTSTRIP 1
1262#define BOTHSTRIP 2
1263
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001264/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001265static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1266
1267#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001268
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001269
1270static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001271split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001272{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001273 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001274 PyObject* item;
1275 PyObject *list = PyList_New(0);
1276
1277 if (list == NULL)
1278 return NULL;
1279
Guido van Rossum4c08d552000-03-10 22:55:18 +00001280 for (i = j = 0; i < len; ) {
1281 while (i < len && isspace(Py_CHARMASK(s[i])))
1282 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001283 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001284 while (i < len && !isspace(Py_CHARMASK(s[i])))
1285 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001287 if (maxsplit-- <= 0)
1288 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1290 if (item == NULL)
1291 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292 err = PyList_Append(list, item);
1293 Py_DECREF(item);
1294 if (err < 0)
1295 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 while (i < len && isspace(Py_CHARMASK(s[i])))
1297 i++;
1298 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 }
1300 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001301 if (j < len) {
1302 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1303 if (item == NULL)
1304 goto finally;
1305 err = PyList_Append(list, item);
1306 Py_DECREF(item);
1307 if (err < 0)
1308 goto finally;
1309 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310 return list;
1311 finally:
1312 Py_DECREF(list);
1313 return NULL;
1314}
1315
1316
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001317PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318"S.split([sep [,maxsplit]]) -> list of strings\n\
1319\n\
1320Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001321delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001322splits are done. If sep is not specified or is None, any\n\
1323whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001324
1325static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001326string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327{
1328 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001329 int maxsplit = -1;
1330 const char *s = PyString_AS_STRING(self), *sub;
1331 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332
Guido van Rossum4c08d552000-03-10 22:55:18 +00001333 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335 if (maxsplit < 0)
1336 maxsplit = INT_MAX;
1337 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001339 if (PyString_Check(subobj)) {
1340 sub = PyString_AS_STRING(subobj);
1341 n = PyString_GET_SIZE(subobj);
1342 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001343#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001344 else if (PyUnicode_Check(subobj))
1345 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001346#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001347 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1348 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 if (n == 0) {
1350 PyErr_SetString(PyExc_ValueError, "empty separator");
1351 return NULL;
1352 }
1353
1354 list = PyList_New(0);
1355 if (list == NULL)
1356 return NULL;
1357
1358 i = j = 0;
1359 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001360 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001361 if (maxsplit-- <= 0)
1362 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001363 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1364 if (item == NULL)
1365 goto fail;
1366 err = PyList_Append(list, item);
1367 Py_DECREF(item);
1368 if (err < 0)
1369 goto fail;
1370 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371 }
1372 else
1373 i++;
1374 }
1375 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1376 if (item == NULL)
1377 goto fail;
1378 err = PyList_Append(list, item);
1379 Py_DECREF(item);
1380 if (err < 0)
1381 goto fail;
1382
1383 return list;
1384
1385 fail:
1386 Py_DECREF(list);
1387 return NULL;
1388}
1389
1390
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001391PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001392"S.join(sequence) -> string\n\
1393\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001394Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001395sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396
1397static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001398string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399{
1400 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001401 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 char *p;
1404 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001405 size_t sz = 0;
1406 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001407 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408
Tim Peters19fe14e2001-01-19 03:03:47 +00001409 seq = PySequence_Fast(orig, "");
1410 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001411 if (PyErr_ExceptionMatches(PyExc_TypeError))
1412 PyErr_Format(PyExc_TypeError,
1413 "sequence expected, %.80s found",
1414 orig->ob_type->tp_name);
1415 return NULL;
1416 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001417
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001418 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001419 if (seqlen == 0) {
1420 Py_DECREF(seq);
1421 return PyString_FromString("");
1422 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001424 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001425 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1426 PyErr_Format(PyExc_TypeError,
1427 "sequence item 0: expected string,"
1428 " %.80s found",
1429 item->ob_type->tp_name);
1430 Py_DECREF(seq);
1431 return NULL;
1432 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001433 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001434 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001435 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001437
Tim Peters19fe14e2001-01-19 03:03:47 +00001438 /* There are at least two things to join. Do a pre-pass to figure out
1439 * the total amount of space we'll need (sz), see whether any argument
1440 * is absurd, and defer to the Unicode join if appropriate.
1441 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001442 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001443 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001444 item = PySequence_Fast_GET_ITEM(seq, i);
1445 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001446#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001447 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001448 /* Defer to Unicode join.
1449 * CAUTION: There's no gurantee that the
1450 * original sequence can be iterated over
1451 * again, so we must pass seq here.
1452 */
1453 PyObject *result;
1454 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001455 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001456 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001457 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001458#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001459 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001460 "sequence item %i: expected string,"
1461 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001463 Py_DECREF(seq);
1464 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001465 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001466 sz += PyString_GET_SIZE(item);
1467 if (i != 0)
1468 sz += seplen;
1469 if (sz < old_sz || sz > INT_MAX) {
1470 PyErr_SetString(PyExc_OverflowError,
1471 "join() is too long for a Python string");
1472 Py_DECREF(seq);
1473 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001475 }
1476
1477 /* Allocate result space. */
1478 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1479 if (res == NULL) {
1480 Py_DECREF(seq);
1481 return NULL;
1482 }
1483
1484 /* Catenate everything. */
1485 p = PyString_AS_STRING(res);
1486 for (i = 0; i < seqlen; ++i) {
1487 size_t n;
1488 item = PySequence_Fast_GET_ITEM(seq, i);
1489 n = PyString_GET_SIZE(item);
1490 memcpy(p, PyString_AS_STRING(item), n);
1491 p += n;
1492 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001493 memcpy(p, sep, seplen);
1494 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001495 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001497
Jeremy Hylton49048292000-07-11 03:28:17 +00001498 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500}
1501
Tim Peters52e155e2001-06-16 05:42:57 +00001502PyObject *
1503_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001504{
Tim Petersa7259592001-06-16 05:11:17 +00001505 assert(sep != NULL && PyString_Check(sep));
1506 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001507 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001508}
1509
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001510static void
1511string_adjust_indices(int *start, int *end, int len)
1512{
1513 if (*end > len)
1514 *end = len;
1515 else if (*end < 0)
1516 *end += len;
1517 if (*end < 0)
1518 *end = 0;
1519 if (*start < 0)
1520 *start += len;
1521 if (*start < 0)
1522 *start = 0;
1523}
1524
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525static long
Fred Drakeba096332000-07-09 07:04:36 +00001526string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001528 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001529 int len = PyString_GET_SIZE(self);
1530 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001531 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001533 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001534 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535 return -2;
1536 if (PyString_Check(subobj)) {
1537 sub = PyString_AS_STRING(subobj);
1538 n = PyString_GET_SIZE(subobj);
1539 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001540#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001541 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001542 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001543#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001544 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545 return -2;
1546
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001547 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 if (dir > 0) {
1550 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 last -= n;
1553 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001554 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001555 return (long)i;
1556 }
1557 else {
1558 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001559
Guido van Rossum4c08d552000-03-10 22:55:18 +00001560 if (n == 0 && i <= last)
1561 return (long)last;
1562 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001563 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 return (long)j;
1565 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001566
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567 return -1;
1568}
1569
1570
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001571PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001572"S.find(sub [,start [,end]]) -> int\n\
1573\n\
1574Return the lowest index in S where substring sub is found,\n\
1575such that sub is contained within s[start,end]. Optional\n\
1576arguments start and end are interpreted as in slice notation.\n\
1577\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001578Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579
1580static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001581string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001583 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001584 if (result == -2)
1585 return NULL;
1586 return PyInt_FromLong(result);
1587}
1588
1589
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001590PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591"S.index(sub [,start [,end]]) -> int\n\
1592\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001593Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594
1595static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001596string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001598 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 if (result == -2)
1600 return NULL;
1601 if (result == -1) {
1602 PyErr_SetString(PyExc_ValueError,
1603 "substring not found in string.index");
1604 return NULL;
1605 }
1606 return PyInt_FromLong(result);
1607}
1608
1609
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001610PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611"S.rfind(sub [,start [,end]]) -> int\n\
1612\n\
1613Return the highest index in S where substring sub is found,\n\
1614such that sub is contained within s[start,end]. Optional\n\
1615arguments start and end are interpreted as in slice notation.\n\
1616\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001617Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618
1619static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001620string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623 if (result == -2)
1624 return NULL;
1625 return PyInt_FromLong(result);
1626}
1627
1628
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001629PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630"S.rindex(sub [,start [,end]]) -> int\n\
1631\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001632Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633
1634static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001635string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001637 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 if (result == -2)
1639 return NULL;
1640 if (result == -1) {
1641 PyErr_SetString(PyExc_ValueError,
1642 "substring not found in string.rindex");
1643 return NULL;
1644 }
1645 return PyInt_FromLong(result);
1646}
1647
1648
1649static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001650do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1651{
1652 char *s = PyString_AS_STRING(self);
1653 int len = PyString_GET_SIZE(self);
1654 char *sep = PyString_AS_STRING(sepobj);
1655 int seplen = PyString_GET_SIZE(sepobj);
1656 int i, j;
1657
1658 i = 0;
1659 if (striptype != RIGHTSTRIP) {
1660 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1661 i++;
1662 }
1663 }
1664
1665 j = len;
1666 if (striptype != LEFTSTRIP) {
1667 do {
1668 j--;
1669 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1670 j++;
1671 }
1672
1673 if (i == 0 && j == len && PyString_CheckExact(self)) {
1674 Py_INCREF(self);
1675 return (PyObject*)self;
1676 }
1677 else
1678 return PyString_FromStringAndSize(s+i, j-i);
1679}
1680
1681
1682static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001683do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684{
1685 char *s = PyString_AS_STRING(self);
1686 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688 i = 0;
1689 if (striptype != RIGHTSTRIP) {
1690 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1691 i++;
1692 }
1693 }
1694
1695 j = len;
1696 if (striptype != LEFTSTRIP) {
1697 do {
1698 j--;
1699 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1700 j++;
1701 }
1702
Tim Peters8fa5dd02001-09-12 02:18:30 +00001703 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704 Py_INCREF(self);
1705 return (PyObject*)self;
1706 }
1707 else
1708 return PyString_FromStringAndSize(s+i, j-i);
1709}
1710
1711
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001712static PyObject *
1713do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1714{
1715 PyObject *sep = NULL;
1716
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001717 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001718 return NULL;
1719
1720 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001721 if (PyString_Check(sep))
1722 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001723#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001724 else if (PyUnicode_Check(sep)) {
1725 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1726 PyObject *res;
1727 if (uniself==NULL)
1728 return NULL;
1729 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1730 striptype, sep);
1731 Py_DECREF(uniself);
1732 return res;
1733 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001734#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001735 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001736 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001737#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001738 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001739#else
1740 "%s arg must be None or str",
1741#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001742 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001743 return NULL;
1744 }
1745 return do_xstrip(self, striptype, sep);
1746 }
1747
1748 return do_strip(self, striptype);
1749}
1750
1751
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001752PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001753"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754\n\
1755Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001756whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001757If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001758If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759
1760static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001761string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001763 if (PyTuple_GET_SIZE(args) == 0)
1764 return do_strip(self, BOTHSTRIP); /* Common case */
1765 else
1766 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767}
1768
1769
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001770PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001771"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001773Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001774If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776
1777static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001778string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001780 if (PyTuple_GET_SIZE(args) == 0)
1781 return do_strip(self, LEFTSTRIP); /* Common case */
1782 else
1783 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784}
1785
1786
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001787PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001788"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001790Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001791If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001792If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793
1794static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001795string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001797 if (PyTuple_GET_SIZE(args) == 0)
1798 return do_strip(self, RIGHTSTRIP); /* Common case */
1799 else
1800 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801}
1802
1803
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001804PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805"S.lower() -> string\n\
1806\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001807Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808
1809static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001810string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811{
1812 char *s = PyString_AS_STRING(self), *s_new;
1813 int i, n = PyString_GET_SIZE(self);
1814 PyObject *new;
1815
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816 new = PyString_FromStringAndSize(NULL, n);
1817 if (new == NULL)
1818 return NULL;
1819 s_new = PyString_AsString(new);
1820 for (i = 0; i < n; i++) {
1821 int c = Py_CHARMASK(*s++);
1822 if (isupper(c)) {
1823 *s_new = tolower(c);
1824 } else
1825 *s_new = c;
1826 s_new++;
1827 }
1828 return new;
1829}
1830
1831
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001832PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001833"S.upper() -> string\n\
1834\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001835Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836
1837static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001838string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839{
1840 char *s = PyString_AS_STRING(self), *s_new;
1841 int i, n = PyString_GET_SIZE(self);
1842 PyObject *new;
1843
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844 new = PyString_FromStringAndSize(NULL, n);
1845 if (new == NULL)
1846 return NULL;
1847 s_new = PyString_AsString(new);
1848 for (i = 0; i < n; i++) {
1849 int c = Py_CHARMASK(*s++);
1850 if (islower(c)) {
1851 *s_new = toupper(c);
1852 } else
1853 *s_new = c;
1854 s_new++;
1855 }
1856 return new;
1857}
1858
1859
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001860PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001861"S.title() -> string\n\
1862\n\
1863Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001864characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865
1866static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001867string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868{
1869 char *s = PyString_AS_STRING(self), *s_new;
1870 int i, n = PyString_GET_SIZE(self);
1871 int previous_is_cased = 0;
1872 PyObject *new;
1873
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 new = PyString_FromStringAndSize(NULL, n);
1875 if (new == NULL)
1876 return NULL;
1877 s_new = PyString_AsString(new);
1878 for (i = 0; i < n; i++) {
1879 int c = Py_CHARMASK(*s++);
1880 if (islower(c)) {
1881 if (!previous_is_cased)
1882 c = toupper(c);
1883 previous_is_cased = 1;
1884 } else if (isupper(c)) {
1885 if (previous_is_cased)
1886 c = tolower(c);
1887 previous_is_cased = 1;
1888 } else
1889 previous_is_cased = 0;
1890 *s_new++ = c;
1891 }
1892 return new;
1893}
1894
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001895PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896"S.capitalize() -> string\n\
1897\n\
1898Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001899capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900
1901static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001902string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903{
1904 char *s = PyString_AS_STRING(self), *s_new;
1905 int i, n = PyString_GET_SIZE(self);
1906 PyObject *new;
1907
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 new = PyString_FromStringAndSize(NULL, n);
1909 if (new == NULL)
1910 return NULL;
1911 s_new = PyString_AsString(new);
1912 if (0 < n) {
1913 int c = Py_CHARMASK(*s++);
1914 if (islower(c))
1915 *s_new = toupper(c);
1916 else
1917 *s_new = c;
1918 s_new++;
1919 }
1920 for (i = 1; i < n; i++) {
1921 int c = Py_CHARMASK(*s++);
1922 if (isupper(c))
1923 *s_new = tolower(c);
1924 else
1925 *s_new = c;
1926 s_new++;
1927 }
1928 return new;
1929}
1930
1931
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001932PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933"S.count(sub[, start[, end]]) -> int\n\
1934\n\
1935Return the number of occurrences of substring sub in string\n\
1936S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001937interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938
1939static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001940string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001942 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943 int len = PyString_GET_SIZE(self), n;
1944 int i = 0, last = INT_MAX;
1945 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001946 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947
Guido van Rossumc6821402000-05-08 14:08:05 +00001948 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1949 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001951
Guido van Rossum4c08d552000-03-10 22:55:18 +00001952 if (PyString_Check(subobj)) {
1953 sub = PyString_AS_STRING(subobj);
1954 n = PyString_GET_SIZE(subobj);
1955 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001956#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001957 else if (PyUnicode_Check(subobj)) {
1958 int count;
1959 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1960 if (count == -1)
1961 return NULL;
1962 else
1963 return PyInt_FromLong((long) count);
1964 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001965#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001966 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1967 return NULL;
1968
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001969 string_adjust_indices(&i, &last, len);
1970
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971 m = last + 1 - n;
1972 if (n == 0)
1973 return PyInt_FromLong((long) (m-i));
1974
1975 r = 0;
1976 while (i < m) {
1977 if (!memcmp(s+i, sub, n)) {
1978 r++;
1979 i += n;
1980 } else {
1981 i++;
1982 }
1983 }
1984 return PyInt_FromLong((long) r);
1985}
1986
1987
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001988PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989"S.swapcase() -> string\n\
1990\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001991Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001992converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993
1994static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001995string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996{
1997 char *s = PyString_AS_STRING(self), *s_new;
1998 int i, n = PyString_GET_SIZE(self);
1999 PyObject *new;
2000
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001 new = PyString_FromStringAndSize(NULL, n);
2002 if (new == NULL)
2003 return NULL;
2004 s_new = PyString_AsString(new);
2005 for (i = 0; i < n; i++) {
2006 int c = Py_CHARMASK(*s++);
2007 if (islower(c)) {
2008 *s_new = toupper(c);
2009 }
2010 else if (isupper(c)) {
2011 *s_new = tolower(c);
2012 }
2013 else
2014 *s_new = c;
2015 s_new++;
2016 }
2017 return new;
2018}
2019
2020
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002021PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022"S.translate(table [,deletechars]) -> string\n\
2023\n\
2024Return a copy of the string S, where all characters occurring\n\
2025in the optional argument deletechars are removed, and the\n\
2026remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002027translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028
2029static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002030string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002032 register char *input, *output;
2033 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034 register int i, c, changed = 0;
2035 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002036 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037 int inlen, tablen, dellen = 0;
2038 PyObject *result;
2039 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002040 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041
Guido van Rossum4c08d552000-03-10 22:55:18 +00002042 if (!PyArg_ParseTuple(args, "O|O:translate",
2043 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002045
2046 if (PyString_Check(tableobj)) {
2047 table1 = PyString_AS_STRING(tableobj);
2048 tablen = PyString_GET_SIZE(tableobj);
2049 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002050#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002051 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002052 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002053 parameter; instead a mapping to None will cause characters
2054 to be deleted. */
2055 if (delobj != NULL) {
2056 PyErr_SetString(PyExc_TypeError,
2057 "deletions are implemented differently for unicode");
2058 return NULL;
2059 }
2060 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2061 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002062#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065
2066 if (delobj != NULL) {
2067 if (PyString_Check(delobj)) {
2068 del_table = PyString_AS_STRING(delobj);
2069 dellen = PyString_GET_SIZE(delobj);
2070 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002071#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002072 else if (PyUnicode_Check(delobj)) {
2073 PyErr_SetString(PyExc_TypeError,
2074 "deletions are implemented differently for unicode");
2075 return NULL;
2076 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002077#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2079 return NULL;
2080
2081 if (tablen != 256) {
2082 PyErr_SetString(PyExc_ValueError,
2083 "translation table must be 256 characters long");
2084 return NULL;
2085 }
2086 }
2087 else {
2088 del_table = NULL;
2089 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090 }
2091
2092 table = table1;
2093 inlen = PyString_Size(input_obj);
2094 result = PyString_FromStringAndSize((char *)NULL, inlen);
2095 if (result == NULL)
2096 return NULL;
2097 output_start = output = PyString_AsString(result);
2098 input = PyString_AsString(input_obj);
2099
2100 if (dellen == 0) {
2101 /* If no deletions are required, use faster code */
2102 for (i = inlen; --i >= 0; ) {
2103 c = Py_CHARMASK(*input++);
2104 if (Py_CHARMASK((*output++ = table[c])) != c)
2105 changed = 1;
2106 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002107 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108 return result;
2109 Py_DECREF(result);
2110 Py_INCREF(input_obj);
2111 return input_obj;
2112 }
2113
2114 for (i = 0; i < 256; i++)
2115 trans_table[i] = Py_CHARMASK(table[i]);
2116
2117 for (i = 0; i < dellen; i++)
2118 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2119
2120 for (i = inlen; --i >= 0; ) {
2121 c = Py_CHARMASK(*input++);
2122 if (trans_table[c] != -1)
2123 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2124 continue;
2125 changed = 1;
2126 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002127 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 Py_DECREF(result);
2129 Py_INCREF(input_obj);
2130 return input_obj;
2131 }
2132 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002133 if (inlen > 0)
2134 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135 return result;
2136}
2137
2138
2139/* What follows is used for implementing replace(). Perry Stoll. */
2140
2141/*
2142 mymemfind
2143
2144 strstr replacement for arbitrary blocks of memory.
2145
Barry Warsaw51ac5802000-03-20 16:36:48 +00002146 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 contents of memory pointed to by PAT. Returns the index into MEM if
2148 found, or -1 if not found. If len of PAT is greater than length of
2149 MEM, the function returns -1.
2150*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002151static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002152mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153{
2154 register int ii;
2155
2156 /* pattern can not occur in the last pat_len-1 chars */
2157 len -= pat_len;
2158
2159 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002160 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161 return ii;
2162 }
2163 }
2164 return -1;
2165}
2166
2167/*
2168 mymemcnt
2169
2170 Return the number of distinct times PAT is found in MEM.
2171 meaning mem=1111 and pat==11 returns 2.
2172 mem=11111 and pat==11 also return 2.
2173 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002174static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002175mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176{
2177 register int offset = 0;
2178 int nfound = 0;
2179
2180 while (len >= 0) {
2181 offset = mymemfind(mem, len, pat, pat_len);
2182 if (offset == -1)
2183 break;
2184 mem += offset + pat_len;
2185 len -= offset + pat_len;
2186 nfound++;
2187 }
2188 return nfound;
2189}
2190
2191/*
2192 mymemreplace
2193
Thomas Wouters7e474022000-07-16 12:04:32 +00002194 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195 replaced with SUB.
2196
Thomas Wouters7e474022000-07-16 12:04:32 +00002197 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198 of PAT in STR, then the original string is returned. Otherwise, a new
2199 string is allocated here and returned.
2200
2201 on return, out_len is:
2202 the length of output string, or
2203 -1 if the input string is returned, or
2204 unchanged if an error occurs (no memory).
2205
2206 return value is:
2207 the new string allocated locally, or
2208 NULL if an error occurred.
2209*/
2210static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002211mymemreplace(const char *str, int len, /* input string */
2212 const char *pat, int pat_len, /* pattern string to find */
2213 const char *sub, int sub_len, /* substitution string */
2214 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002215 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002216{
2217 char *out_s;
2218 char *new_s;
2219 int nfound, offset, new_len;
2220
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002221 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222 goto return_same;
2223
2224 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002225 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002226 if (count < 0)
2227 count = INT_MAX;
2228 else if (nfound > count)
2229 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 if (nfound == 0)
2231 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002232
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002234 if (new_len == 0) {
2235 /* Have to allocate something for the caller to free(). */
2236 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002237 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002238 return NULL;
2239 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002241 else {
2242 assert(new_len > 0);
2243 new_s = (char *)PyMem_MALLOC(new_len);
2244 if (new_s == NULL)
2245 return NULL;
2246 out_s = new_s;
2247
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002248 if (pat_len > 0) {
2249 for (; nfound > 0; --nfound) {
2250 /* find index of next instance of pattern */
2251 offset = mymemfind(str, len, pat, pat_len);
2252 if (offset == -1)
2253 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002254
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002255 /* copy non matching part of input string */
2256 memcpy(new_s, str, offset);
2257 str += offset + pat_len;
2258 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002259
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002260 /* copy substitute into the output string */
2261 new_s += offset;
2262 memcpy(new_s, sub, sub_len);
2263 new_s += sub_len;
2264 }
2265 /* copy any remaining values into output string */
2266 if (len > 0)
2267 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002268 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002269 else {
2270 for (;;++str, --len) {
2271 memcpy(new_s, sub, sub_len);
2272 new_s += sub_len;
2273 if (--nfound <= 0) {
2274 memcpy(new_s, str, len);
2275 break;
2276 }
2277 *new_s++ = *str;
2278 }
2279 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002280 }
2281 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002282 return out_s;
2283
2284 return_same:
2285 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002286 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287}
2288
2289
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002290PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291"S.replace (old, new[, maxsplit]) -> string\n\
2292\n\
2293Return a copy of string S with all occurrences of substring\n\
2294old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002295given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296
2297static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002298string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300 const char *str = PyString_AS_STRING(self), *sub, *repl;
2301 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002302 const int len = PyString_GET_SIZE(self);
2303 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308 if (!PyArg_ParseTuple(args, "OO|i:replace",
2309 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311
2312 if (PyString_Check(subobj)) {
2313 sub = PyString_AS_STRING(subobj);
2314 sub_len = PyString_GET_SIZE(subobj);
2315 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002316#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002318 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002319 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002320#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002321 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2322 return NULL;
2323
2324 if (PyString_Check(replobj)) {
2325 repl = PyString_AS_STRING(replobj);
2326 repl_len = PyString_GET_SIZE(replobj);
2327 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002328#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002329 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002330 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002332#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002333 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2334 return NULL;
2335
Guido van Rossum4c08d552000-03-10 22:55:18 +00002336 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 if (new_s == NULL) {
2338 PyErr_NoMemory();
2339 return NULL;
2340 }
2341 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002342 if (PyString_CheckExact(self)) {
2343 /* we're returning another reference to self */
2344 new = (PyObject*)self;
2345 Py_INCREF(new);
2346 }
2347 else {
2348 new = PyString_FromStringAndSize(str, len);
2349 if (new == NULL)
2350 return NULL;
2351 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352 }
2353 else {
2354 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002355 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356 }
2357 return new;
2358}
2359
2360
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002361PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002362"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002364Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002366comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002367
2368static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002369string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002373 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374 int plen;
2375 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002376 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002377 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378
Guido van Rossumc6821402000-05-08 14:08:05 +00002379 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2380 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381 return NULL;
2382 if (PyString_Check(subobj)) {
2383 prefix = PyString_AS_STRING(subobj);
2384 plen = PyString_GET_SIZE(subobj);
2385 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002386#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002387 else if (PyUnicode_Check(subobj)) {
2388 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002389 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002390 subobj, start, end, -1);
2391 if (rc == -1)
2392 return NULL;
2393 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002394 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002395 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002396#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002397 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398 return NULL;
2399
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002400 string_adjust_indices(&start, &end, len);
2401
2402 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002403 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002405 if (end-start >= plen)
2406 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2407 else
2408 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409}
2410
2411
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002412PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002413"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002415Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002417comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418
2419static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002420string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002422 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002424 const char* suffix;
2425 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002427 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429
Guido van Rossumc6821402000-05-08 14:08:05 +00002430 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2431 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432 return NULL;
2433 if (PyString_Check(subobj)) {
2434 suffix = PyString_AS_STRING(subobj);
2435 slen = PyString_GET_SIZE(subobj);
2436 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002437#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002438 else if (PyUnicode_Check(subobj)) {
2439 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002440 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002441 subobj, start, end, +1);
2442 if (rc == -1)
2443 return NULL;
2444 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002445 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002446 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002447#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449 return NULL;
2450
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002451 string_adjust_indices(&start, &end, len);
2452
2453 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002454 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002456 if (end-slen > start)
2457 start = end - slen;
2458 if (end-start >= slen)
2459 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2460 else
2461 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002462}
2463
2464
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002465PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002466"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002467\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002468Encodes S using the codec registered for encoding. encoding defaults\n\
2469to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002470handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002471a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2472'xmlcharrefreplace' as well as any other name registered with\n\
2473codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002474
2475static PyObject *
2476string_encode(PyStringObject *self, PyObject *args)
2477{
2478 char *encoding = NULL;
2479 char *errors = NULL;
2480 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2481 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002482 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2483}
2484
2485
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002486PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002487"S.decode([encoding[,errors]]) -> object\n\
2488\n\
2489Decodes S using the codec registered for encoding. encoding defaults\n\
2490to the default encoding. errors may be given to set a different error\n\
2491handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002492a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2493as well as any other name registerd with codecs.register_error that is\n\
2494able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002495
2496static PyObject *
2497string_decode(PyStringObject *self, PyObject *args)
2498{
2499 char *encoding = NULL;
2500 char *errors = NULL;
2501 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2502 return NULL;
2503 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002504}
2505
2506
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002507PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002508"S.expandtabs([tabsize]) -> string\n\
2509\n\
2510Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002511If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002512
2513static PyObject*
2514string_expandtabs(PyStringObject *self, PyObject *args)
2515{
2516 const char *e, *p;
2517 char *q;
2518 int i, j;
2519 PyObject *u;
2520 int tabsize = 8;
2521
2522 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2523 return NULL;
2524
Thomas Wouters7e474022000-07-16 12:04:32 +00002525 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526 i = j = 0;
2527 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2528 for (p = PyString_AS_STRING(self); p < e; p++)
2529 if (*p == '\t') {
2530 if (tabsize > 0)
2531 j += tabsize - (j % tabsize);
2532 }
2533 else {
2534 j++;
2535 if (*p == '\n' || *p == '\r') {
2536 i += j;
2537 j = 0;
2538 }
2539 }
2540
2541 /* Second pass: create output string and fill it */
2542 u = PyString_FromStringAndSize(NULL, i + j);
2543 if (!u)
2544 return NULL;
2545
2546 j = 0;
2547 q = PyString_AS_STRING(u);
2548
2549 for (p = PyString_AS_STRING(self); p < e; p++)
2550 if (*p == '\t') {
2551 if (tabsize > 0) {
2552 i = tabsize - (j % tabsize);
2553 j += i;
2554 while (i--)
2555 *q++ = ' ';
2556 }
2557 }
2558 else {
2559 j++;
2560 *q++ = *p;
2561 if (*p == '\n' || *p == '\r')
2562 j = 0;
2563 }
2564
2565 return u;
2566}
2567
Tim Peters8fa5dd02001-09-12 02:18:30 +00002568static PyObject *
2569pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002570{
2571 PyObject *u;
2572
2573 if (left < 0)
2574 left = 0;
2575 if (right < 0)
2576 right = 0;
2577
Tim Peters8fa5dd02001-09-12 02:18:30 +00002578 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002579 Py_INCREF(self);
2580 return (PyObject *)self;
2581 }
2582
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002583 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002584 left + PyString_GET_SIZE(self) + right);
2585 if (u) {
2586 if (left)
2587 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002588 memcpy(PyString_AS_STRING(u) + left,
2589 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590 PyString_GET_SIZE(self));
2591 if (right)
2592 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2593 fill, right);
2594 }
2595
2596 return u;
2597}
2598
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002599PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002600"S.ljust(width) -> string\n"
2601"\n"
2602"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002603"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604
2605static PyObject *
2606string_ljust(PyStringObject *self, PyObject *args)
2607{
2608 int width;
2609 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2610 return NULL;
2611
Tim Peters8fa5dd02001-09-12 02:18:30 +00002612 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002613 Py_INCREF(self);
2614 return (PyObject*) self;
2615 }
2616
2617 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2618}
2619
2620
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002621PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002622"S.rjust(width) -> string\n"
2623"\n"
2624"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002625"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002626
2627static PyObject *
2628string_rjust(PyStringObject *self, PyObject *args)
2629{
2630 int width;
2631 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2632 return NULL;
2633
Tim Peters8fa5dd02001-09-12 02:18:30 +00002634 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635 Py_INCREF(self);
2636 return (PyObject*) self;
2637 }
2638
2639 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2640}
2641
2642
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002643PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002644"S.center(width) -> string\n"
2645"\n"
2646"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002647"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002648
2649static PyObject *
2650string_center(PyStringObject *self, PyObject *args)
2651{
2652 int marg, left;
2653 int width;
2654
2655 if (!PyArg_ParseTuple(args, "i:center", &width))
2656 return NULL;
2657
Tim Peters8fa5dd02001-09-12 02:18:30 +00002658 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002659 Py_INCREF(self);
2660 return (PyObject*) self;
2661 }
2662
2663 marg = width - PyString_GET_SIZE(self);
2664 left = marg / 2 + (marg & width & 1);
2665
2666 return pad(self, left, marg - left, ' ');
2667}
2668
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002669PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002670"S.zfill(width) -> string\n"
2671"\n"
2672"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002673"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002674
2675static PyObject *
2676string_zfill(PyStringObject *self, PyObject *args)
2677{
2678 int fill;
2679 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002680 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002681
2682 int width;
2683 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2684 return NULL;
2685
2686 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002687 if (PyString_CheckExact(self)) {
2688 Py_INCREF(self);
2689 return (PyObject*) self;
2690 }
2691 else
2692 return PyString_FromStringAndSize(
2693 PyString_AS_STRING(self),
2694 PyString_GET_SIZE(self)
2695 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002696 }
2697
2698 fill = width - PyString_GET_SIZE(self);
2699
2700 s = pad(self, fill, 0, '0');
2701
2702 if (s == NULL)
2703 return NULL;
2704
2705 p = PyString_AS_STRING(s);
2706 if (p[fill] == '+' || p[fill] == '-') {
2707 /* move sign to beginning of string */
2708 p[0] = p[fill];
2709 p[fill] = '0';
2710 }
2711
2712 return (PyObject*) s;
2713}
2714
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002715PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002716"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002717"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002718"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002719"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002720
2721static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002722string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002723{
Fred Drakeba096332000-07-09 07:04:36 +00002724 register const unsigned char *p
2725 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002726 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002727
Guido van Rossum4c08d552000-03-10 22:55:18 +00002728 /* Shortcut for single character strings */
2729 if (PyString_GET_SIZE(self) == 1 &&
2730 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002731 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002732
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002733 /* Special case for empty strings */
2734 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002735 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002736
Guido van Rossum4c08d552000-03-10 22:55:18 +00002737 e = p + PyString_GET_SIZE(self);
2738 for (; p < e; p++) {
2739 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002740 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002741 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002742 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002743}
2744
2745
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002746PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002747"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002748\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002749Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002750and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002751
2752static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002753string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002754{
Fred Drakeba096332000-07-09 07:04:36 +00002755 register const unsigned char *p
2756 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002757 register const unsigned char *e;
2758
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002759 /* Shortcut for single character strings */
2760 if (PyString_GET_SIZE(self) == 1 &&
2761 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002762 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002763
2764 /* Special case for empty strings */
2765 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002766 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002767
2768 e = p + PyString_GET_SIZE(self);
2769 for (; p < e; p++) {
2770 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002771 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002772 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002773 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002774}
2775
2776
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002777PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002778"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002779\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002780Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002781and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002782
2783static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002784string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002785{
Fred Drakeba096332000-07-09 07:04:36 +00002786 register const unsigned char *p
2787 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002788 register const unsigned char *e;
2789
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002790 /* Shortcut for single character strings */
2791 if (PyString_GET_SIZE(self) == 1 &&
2792 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002793 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002794
2795 /* Special case for empty strings */
2796 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002797 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002798
2799 e = p + PyString_GET_SIZE(self);
2800 for (; p < e; p++) {
2801 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002802 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002803 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002804 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002805}
2806
2807
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002808PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002809"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002810\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002811Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002812False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002813
2814static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002815string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002816{
Fred Drakeba096332000-07-09 07:04:36 +00002817 register const unsigned char *p
2818 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002819 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002820
Guido van Rossum4c08d552000-03-10 22:55:18 +00002821 /* Shortcut for single character strings */
2822 if (PyString_GET_SIZE(self) == 1 &&
2823 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002824 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002825
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002826 /* Special case for empty strings */
2827 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002828 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002829
Guido van Rossum4c08d552000-03-10 22:55:18 +00002830 e = p + PyString_GET_SIZE(self);
2831 for (; p < e; p++) {
2832 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002833 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002834 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002835 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002836}
2837
2838
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002839PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002840"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002841\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002842Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002843at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002844
2845static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002846string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002847{
Fred Drakeba096332000-07-09 07:04:36 +00002848 register const unsigned char *p
2849 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002850 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002851 int cased;
2852
Guido van Rossum4c08d552000-03-10 22:55:18 +00002853 /* Shortcut for single character strings */
2854 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002855 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002856
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002857 /* Special case for empty strings */
2858 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002859 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002860
Guido van Rossum4c08d552000-03-10 22:55:18 +00002861 e = p + PyString_GET_SIZE(self);
2862 cased = 0;
2863 for (; p < e; p++) {
2864 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002865 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002866 else if (!cased && islower(*p))
2867 cased = 1;
2868 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002869 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002870}
2871
2872
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002873PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002874"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002875\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002876Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002877at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002878
2879static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002880string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002881{
Fred Drakeba096332000-07-09 07:04:36 +00002882 register const unsigned char *p
2883 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002884 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002885 int cased;
2886
Guido van Rossum4c08d552000-03-10 22:55:18 +00002887 /* Shortcut for single character strings */
2888 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002889 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002890
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002891 /* Special case for empty strings */
2892 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002893 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002894
Guido van Rossum4c08d552000-03-10 22:55:18 +00002895 e = p + PyString_GET_SIZE(self);
2896 cased = 0;
2897 for (; p < e; p++) {
2898 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002899 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002900 else if (!cased && isupper(*p))
2901 cased = 1;
2902 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002903 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002904}
2905
2906
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002907PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002908"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002909\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002910Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002911may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002912ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002913
2914static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002915string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002916{
Fred Drakeba096332000-07-09 07:04:36 +00002917 register const unsigned char *p
2918 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002919 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002920 int cased, previous_is_cased;
2921
Guido van Rossum4c08d552000-03-10 22:55:18 +00002922 /* Shortcut for single character strings */
2923 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002924 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002925
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002926 /* Special case for empty strings */
2927 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002928 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002929
Guido van Rossum4c08d552000-03-10 22:55:18 +00002930 e = p + PyString_GET_SIZE(self);
2931 cased = 0;
2932 previous_is_cased = 0;
2933 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002934 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002935
2936 if (isupper(ch)) {
2937 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002938 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002939 previous_is_cased = 1;
2940 cased = 1;
2941 }
2942 else if (islower(ch)) {
2943 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002944 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002945 previous_is_cased = 1;
2946 cased = 1;
2947 }
2948 else
2949 previous_is_cased = 0;
2950 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002951 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002952}
2953
2954
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002955PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002956"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002957\n\
2958Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002959Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002960is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002961
2962#define SPLIT_APPEND(data, left, right) \
2963 str = PyString_FromStringAndSize(data + left, right - left); \
2964 if (!str) \
2965 goto onError; \
2966 if (PyList_Append(list, str)) { \
2967 Py_DECREF(str); \
2968 goto onError; \
2969 } \
2970 else \
2971 Py_DECREF(str);
2972
2973static PyObject*
2974string_splitlines(PyStringObject *self, PyObject *args)
2975{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002976 register int i;
2977 register int j;
2978 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002979 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002980 PyObject *list;
2981 PyObject *str;
2982 char *data;
2983
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002984 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002985 return NULL;
2986
2987 data = PyString_AS_STRING(self);
2988 len = PyString_GET_SIZE(self);
2989
Guido van Rossum4c08d552000-03-10 22:55:18 +00002990 list = PyList_New(0);
2991 if (!list)
2992 goto onError;
2993
2994 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002995 int eol;
2996
Guido van Rossum4c08d552000-03-10 22:55:18 +00002997 /* Find a line and append it */
2998 while (i < len && data[i] != '\n' && data[i] != '\r')
2999 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003000
3001 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003002 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003003 if (i < len) {
3004 if (data[i] == '\r' && i + 1 < len &&
3005 data[i+1] == '\n')
3006 i += 2;
3007 else
3008 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003009 if (keepends)
3010 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003011 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003012 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003013 j = i;
3014 }
3015 if (j < len) {
3016 SPLIT_APPEND(data, j, len);
3017 }
3018
3019 return list;
3020
3021 onError:
3022 Py_DECREF(list);
3023 return NULL;
3024}
3025
3026#undef SPLIT_APPEND
3027
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003028
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003029static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003030string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003031 /* Counterparts of the obsolete stropmodule functions; except
3032 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003033 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3034 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3035 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3036 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003037 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3038 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3039 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3040 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3041 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3042 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3043 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003044 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3045 capitalize__doc__},
3046 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3047 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3048 endswith__doc__},
3049 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3050 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3051 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3052 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3053 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3054 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3055 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3056 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3057 startswith__doc__},
3058 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3059 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3060 swapcase__doc__},
3061 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3062 translate__doc__},
3063 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3064 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3065 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3066 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3067 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3068 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3069 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3070 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3071 expandtabs__doc__},
3072 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3073 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003074 {NULL, NULL} /* sentinel */
3075};
3076
Jeremy Hylton938ace62002-07-17 16:30:39 +00003077static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003078str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3079
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003080static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003081string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003082{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003083 PyObject *x = NULL;
3084 static char *kwlist[] = {"object", 0};
3085
Guido van Rossumae960af2001-08-30 03:11:59 +00003086 if (type != &PyString_Type)
3087 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003088 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3089 return NULL;
3090 if (x == NULL)
3091 return PyString_FromString("");
3092 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003093}
3094
Guido van Rossumae960af2001-08-30 03:11:59 +00003095static PyObject *
3096str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3097{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003098 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003099 int n;
3100
3101 assert(PyType_IsSubtype(type, &PyString_Type));
3102 tmp = string_new(&PyString_Type, args, kwds);
3103 if (tmp == NULL)
3104 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003105 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003106 n = PyString_GET_SIZE(tmp);
3107 pnew = type->tp_alloc(type, n);
3108 if (pnew != NULL) {
3109 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003110 ((PyStringObject *)pnew)->ob_shash =
3111 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003112 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003113 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003114 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003115 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003116}
3117
Guido van Rossumcacfc072002-05-24 19:01:59 +00003118static PyObject *
3119basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3120{
3121 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003122 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003123 return NULL;
3124}
3125
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003126PyDoc_STRVAR(basestring_doc,
3127"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003128
3129PyTypeObject PyBaseString_Type = {
3130 PyObject_HEAD_INIT(&PyType_Type)
3131 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003132 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003133 0,
3134 0,
3135 0, /* tp_dealloc */
3136 0, /* tp_print */
3137 0, /* tp_getattr */
3138 0, /* tp_setattr */
3139 0, /* tp_compare */
3140 0, /* tp_repr */
3141 0, /* tp_as_number */
3142 0, /* tp_as_sequence */
3143 0, /* tp_as_mapping */
3144 0, /* tp_hash */
3145 0, /* tp_call */
3146 0, /* tp_str */
3147 0, /* tp_getattro */
3148 0, /* tp_setattro */
3149 0, /* tp_as_buffer */
3150 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3151 basestring_doc, /* tp_doc */
3152 0, /* tp_traverse */
3153 0, /* tp_clear */
3154 0, /* tp_richcompare */
3155 0, /* tp_weaklistoffset */
3156 0, /* tp_iter */
3157 0, /* tp_iternext */
3158 0, /* tp_methods */
3159 0, /* tp_members */
3160 0, /* tp_getset */
3161 &PyBaseObject_Type, /* tp_base */
3162 0, /* tp_dict */
3163 0, /* tp_descr_get */
3164 0, /* tp_descr_set */
3165 0, /* tp_dictoffset */
3166 0, /* tp_init */
3167 0, /* tp_alloc */
3168 basestring_new, /* tp_new */
3169 0, /* tp_free */
3170};
3171
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003172PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003173"str(object) -> string\n\
3174\n\
3175Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003176If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003177
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003178PyTypeObject PyString_Type = {
3179 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003180 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003181 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003182 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003183 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003184 (destructor)string_dealloc, /* tp_dealloc */
3185 (printfunc)string_print, /* tp_print */
3186 0, /* tp_getattr */
3187 0, /* tp_setattr */
3188 0, /* tp_compare */
3189 (reprfunc)string_repr, /* tp_repr */
3190 0, /* tp_as_number */
3191 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003192 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003193 (hashfunc)string_hash, /* tp_hash */
3194 0, /* tp_call */
3195 (reprfunc)string_str, /* tp_str */
3196 PyObject_GenericGetAttr, /* tp_getattro */
3197 0, /* tp_setattro */
3198 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00003199 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003200 string_doc, /* tp_doc */
3201 0, /* tp_traverse */
3202 0, /* tp_clear */
3203 (richcmpfunc)string_richcompare, /* tp_richcompare */
3204 0, /* tp_weaklistoffset */
3205 0, /* tp_iter */
3206 0, /* tp_iternext */
3207 string_methods, /* tp_methods */
3208 0, /* tp_members */
3209 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003210 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003211 0, /* tp_dict */
3212 0, /* tp_descr_get */
3213 0, /* tp_descr_set */
3214 0, /* tp_dictoffset */
3215 0, /* tp_init */
3216 0, /* tp_alloc */
3217 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003218 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003219};
3220
3221void
Fred Drakeba096332000-07-09 07:04:36 +00003222PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003223{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003224 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003225 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003226 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003227 if (w == NULL || !PyString_Check(*pv)) {
3228 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003229 *pv = NULL;
3230 return;
3231 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003232 v = string_concat((PyStringObject *) *pv, w);
3233 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003234 *pv = v;
3235}
3236
Guido van Rossum013142a1994-08-30 08:19:36 +00003237void
Fred Drakeba096332000-07-09 07:04:36 +00003238PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003239{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003240 PyString_Concat(pv, w);
3241 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003242}
3243
3244
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003245/* The following function breaks the notion that strings are immutable:
3246 it changes the size of a string. We get away with this only if there
3247 is only one module referencing the object. You can also think of it
3248 as creating a new string object and destroying the old one, only
3249 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003250 already be known to some other part of the code...
3251 Note that if there's not enough memory to resize the string, the original
3252 string object at *pv is deallocated, *pv is set to NULL, an "out of
3253 memory" exception is set, and -1 is returned. Else (on success) 0 is
3254 returned, and the value in *pv may or may not be the same as on input.
3255 As always, an extra byte is allocated for a trailing \0 byte (newsize
3256 does *not* include that), and a trailing \0 byte is stored.
3257*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003258
3259int
Fred Drakeba096332000-07-09 07:04:36 +00003260_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003261{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003262 register PyObject *v;
3263 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003264 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003265 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003266 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003267 Py_DECREF(v);
3268 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003269 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003270 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003271 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003272 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003273 _Py_ForgetReference(v);
3274 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003275 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003276 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003277 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003278 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003279 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003280 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003281 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003282 _Py_NewReference(*pv);
3283 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003284 sv->ob_size = newsize;
3285 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003286 return 0;
3287}
Guido van Rossume5372401993-03-16 12:15:04 +00003288
3289/* Helpers for formatstring */
3290
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003291static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003292getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003293{
3294 int argidx = *p_argidx;
3295 if (argidx < arglen) {
3296 (*p_argidx)++;
3297 if (arglen < 0)
3298 return args;
3299 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003300 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003301 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003302 PyErr_SetString(PyExc_TypeError,
3303 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003304 return NULL;
3305}
3306
Tim Peters38fd5b62000-09-21 05:43:11 +00003307/* Format codes
3308 * F_LJUST '-'
3309 * F_SIGN '+'
3310 * F_BLANK ' '
3311 * F_ALT '#'
3312 * F_ZERO '0'
3313 */
Guido van Rossume5372401993-03-16 12:15:04 +00003314#define F_LJUST (1<<0)
3315#define F_SIGN (1<<1)
3316#define F_BLANK (1<<2)
3317#define F_ALT (1<<3)
3318#define F_ZERO (1<<4)
3319
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003320static int
Fred Drakeba096332000-07-09 07:04:36 +00003321formatfloat(char *buf, size_t buflen, int flags,
3322 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003323{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003324 /* fmt = '%#.' + `prec` + `type`
3325 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003326 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003327 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003328 x = PyFloat_AsDouble(v);
3329 if (x == -1.0 && PyErr_Occurred()) {
3330 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003331 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003332 }
Guido van Rossume5372401993-03-16 12:15:04 +00003333 if (prec < 0)
3334 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003335 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3336 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003337 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3338 (flags&F_ALT) ? "#" : "",
3339 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003340 /* worst case length calc to ensure no buffer overrun:
3341 fmt = %#.<prec>g
3342 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003343 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003344 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3345 If prec=0 the effective precision is 1 (the leading digit is
3346 always given), therefore increase by one to 10+prec. */
3347 if (buflen <= (size_t)10 + (size_t)prec) {
3348 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003349 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003350 return -1;
3351 }
Tim Peters885d4572001-11-28 20:27:42 +00003352 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003353 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003354}
3355
Tim Peters38fd5b62000-09-21 05:43:11 +00003356/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3357 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3358 * Python's regular ints.
3359 * Return value: a new PyString*, or NULL if error.
3360 * . *pbuf is set to point into it,
3361 * *plen set to the # of chars following that.
3362 * Caller must decref it when done using pbuf.
3363 * The string starting at *pbuf is of the form
3364 * "-"? ("0x" | "0X")? digit+
3365 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003366 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003367 * There will be at least prec digits, zero-filled on the left if
3368 * necessary to get that many.
3369 * val object to be converted
3370 * flags bitmask of format flags; only F_ALT is looked at
3371 * prec minimum number of digits; 0-fill on left if needed
3372 * type a character in [duoxX]; u acts the same as d
3373 *
3374 * CAUTION: o, x and X conversions on regular ints can never
3375 * produce a '-' sign, but can for Python's unbounded ints.
3376 */
3377PyObject*
3378_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3379 char **pbuf, int *plen)
3380{
3381 PyObject *result = NULL;
3382 char *buf;
3383 int i;
3384 int sign; /* 1 if '-', else 0 */
3385 int len; /* number of characters */
3386 int numdigits; /* len == numnondigits + numdigits */
3387 int numnondigits = 0;
3388
3389 switch (type) {
3390 case 'd':
3391 case 'u':
3392 result = val->ob_type->tp_str(val);
3393 break;
3394 case 'o':
3395 result = val->ob_type->tp_as_number->nb_oct(val);
3396 break;
3397 case 'x':
3398 case 'X':
3399 numnondigits = 2;
3400 result = val->ob_type->tp_as_number->nb_hex(val);
3401 break;
3402 default:
3403 assert(!"'type' not in [duoxX]");
3404 }
3405 if (!result)
3406 return NULL;
3407
3408 /* To modify the string in-place, there can only be one reference. */
3409 if (result->ob_refcnt != 1) {
3410 PyErr_BadInternalCall();
3411 return NULL;
3412 }
3413 buf = PyString_AsString(result);
3414 len = PyString_Size(result);
3415 if (buf[len-1] == 'L') {
3416 --len;
3417 buf[len] = '\0';
3418 }
3419 sign = buf[0] == '-';
3420 numnondigits += sign;
3421 numdigits = len - numnondigits;
3422 assert(numdigits > 0);
3423
Tim Petersfff53252001-04-12 18:38:48 +00003424 /* Get rid of base marker unless F_ALT */
3425 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003426 /* Need to skip 0x, 0X or 0. */
3427 int skipped = 0;
3428 switch (type) {
3429 case 'o':
3430 assert(buf[sign] == '0');
3431 /* If 0 is only digit, leave it alone. */
3432 if (numdigits > 1) {
3433 skipped = 1;
3434 --numdigits;
3435 }
3436 break;
3437 case 'x':
3438 case 'X':
3439 assert(buf[sign] == '0');
3440 assert(buf[sign + 1] == 'x');
3441 skipped = 2;
3442 numnondigits -= 2;
3443 break;
3444 }
3445 if (skipped) {
3446 buf += skipped;
3447 len -= skipped;
3448 if (sign)
3449 buf[0] = '-';
3450 }
3451 assert(len == numnondigits + numdigits);
3452 assert(numdigits > 0);
3453 }
3454
3455 /* Fill with leading zeroes to meet minimum width. */
3456 if (prec > numdigits) {
3457 PyObject *r1 = PyString_FromStringAndSize(NULL,
3458 numnondigits + prec);
3459 char *b1;
3460 if (!r1) {
3461 Py_DECREF(result);
3462 return NULL;
3463 }
3464 b1 = PyString_AS_STRING(r1);
3465 for (i = 0; i < numnondigits; ++i)
3466 *b1++ = *buf++;
3467 for (i = 0; i < prec - numdigits; i++)
3468 *b1++ = '0';
3469 for (i = 0; i < numdigits; i++)
3470 *b1++ = *buf++;
3471 *b1 = '\0';
3472 Py_DECREF(result);
3473 result = r1;
3474 buf = PyString_AS_STRING(result);
3475 len = numnondigits + prec;
3476 }
3477
3478 /* Fix up case for hex conversions. */
3479 switch (type) {
3480 case 'x':
3481 /* Need to convert all upper case letters to lower case. */
3482 for (i = 0; i < len; i++)
3483 if (buf[i] >= 'A' && buf[i] <= 'F')
3484 buf[i] += 'a'-'A';
3485 break;
3486 case 'X':
3487 /* Need to convert 0x to 0X (and -0x to -0X). */
3488 if (buf[sign + 1] == 'x')
3489 buf[sign + 1] = 'X';
3490 break;
3491 }
3492 *pbuf = buf;
3493 *plen = len;
3494 return result;
3495}
3496
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003497static int
Fred Drakeba096332000-07-09 07:04:36 +00003498formatint(char *buf, size_t buflen, int flags,
3499 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003500{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003501 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003502 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3503 + 1 + 1 = 24 */
3504 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003505 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003506
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003507 x = PyInt_AsLong(v);
3508 if (x == -1 && PyErr_Occurred()) {
3509 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003510 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003511 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003512 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003513 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003514 "%u/%o/%x/%X of negative int will return "
3515 "a signed string in Python 2.4 and up") < 0)
3516 return -1;
3517 }
Guido van Rossume5372401993-03-16 12:15:04 +00003518 if (prec < 0)
3519 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003520
3521 if ((flags & F_ALT) &&
3522 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003523 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003524 * of issues that cause pain:
3525 * - when 0 is being converted, the C standard leaves off
3526 * the '0x' or '0X', which is inconsistent with other
3527 * %#x/%#X conversions and inconsistent with Python's
3528 * hex() function
3529 * - there are platforms that violate the standard and
3530 * convert 0 with the '0x' or '0X'
3531 * (Metrowerks, Compaq Tru64)
3532 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003533 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003534 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003535 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003536 * We can achieve the desired consistency by inserting our
3537 * own '0x' or '0X' prefix, and substituting %x/%X in place
3538 * of %#x/%#X.
3539 *
3540 * Note that this is the same approach as used in
3541 * formatint() in unicodeobject.c
3542 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003543 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003544 type, prec, type);
3545 }
3546 else {
3547 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003548 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003549 prec, type);
3550 }
3551
Tim Peters38fd5b62000-09-21 05:43:11 +00003552 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003553 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3554 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003555 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003556 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003557 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003558 return -1;
3559 }
Tim Peters885d4572001-11-28 20:27:42 +00003560 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003561 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003562}
3563
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003564static int
Fred Drakeba096332000-07-09 07:04:36 +00003565formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003566{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003567 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003568 if (PyString_Check(v)) {
3569 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003570 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003571 }
3572 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003573 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003574 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003575 }
3576 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003577 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003578}
3579
Guido van Rossum013142a1994-08-30 08:19:36 +00003580
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003581/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3582
3583 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3584 chars are formatted. XXX This is a magic number. Each formatting
3585 routine does bounds checking to ensure no overflow, but a better
3586 solution may be to malloc a buffer of appropriate size for each
3587 format. For now, the current solution is sufficient.
3588*/
3589#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003590
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003591PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003592PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003593{
3594 char *fmt, *res;
3595 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003596 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003597 PyObject *result, *orig_args;
3598#ifdef Py_USING_UNICODE
3599 PyObject *v, *w;
3600#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003601 PyObject *dict = NULL;
3602 if (format == NULL || !PyString_Check(format) || args == NULL) {
3603 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003604 return NULL;
3605 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003606 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003607 fmt = PyString_AS_STRING(format);
3608 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003609 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003610 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003611 if (result == NULL)
3612 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003613 res = PyString_AsString(result);
3614 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003615 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003616 argidx = 0;
3617 }
3618 else {
3619 arglen = -1;
3620 argidx = -2;
3621 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003622 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003623 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003624 while (--fmtcnt >= 0) {
3625 if (*fmt != '%') {
3626 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003627 rescnt = fmtcnt + 100;
3628 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003629 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003630 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003631 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003632 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003633 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003634 }
3635 *res++ = *fmt++;
3636 }
3637 else {
3638 /* Got a format specifier */
3639 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003640 int width = -1;
3641 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003642 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003643 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003644 PyObject *v = NULL;
3645 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003646 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003647 int sign;
3648 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003649 char formatbuf[FORMATBUFLEN];
3650 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003651#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003652 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003653 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003654#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003655
Guido van Rossumda9c2711996-12-05 21:58:58 +00003656 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003657 if (*fmt == '(') {
3658 char *keystart;
3659 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003660 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003661 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003662
3663 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003664 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003665 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003666 goto error;
3667 }
3668 ++fmt;
3669 --fmtcnt;
3670 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003671 /* Skip over balanced parentheses */
3672 while (pcount > 0 && --fmtcnt >= 0) {
3673 if (*fmt == ')')
3674 --pcount;
3675 else if (*fmt == '(')
3676 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003677 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003678 }
3679 keylen = fmt - keystart - 1;
3680 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003681 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003682 "incomplete format key");
3683 goto error;
3684 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003685 key = PyString_FromStringAndSize(keystart,
3686 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003687 if (key == NULL)
3688 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003689 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003690 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003691 args_owned = 0;
3692 }
3693 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003694 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003695 if (args == NULL) {
3696 goto error;
3697 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003698 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003699 arglen = -1;
3700 argidx = -2;
3701 }
Guido van Rossume5372401993-03-16 12:15:04 +00003702 while (--fmtcnt >= 0) {
3703 switch (c = *fmt++) {
3704 case '-': flags |= F_LJUST; continue;
3705 case '+': flags |= F_SIGN; continue;
3706 case ' ': flags |= F_BLANK; continue;
3707 case '#': flags |= F_ALT; continue;
3708 case '0': flags |= F_ZERO; continue;
3709 }
3710 break;
3711 }
3712 if (c == '*') {
3713 v = getnextarg(args, arglen, &argidx);
3714 if (v == NULL)
3715 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003716 if (!PyInt_Check(v)) {
3717 PyErr_SetString(PyExc_TypeError,
3718 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003719 goto error;
3720 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003721 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003722 if (width < 0) {
3723 flags |= F_LJUST;
3724 width = -width;
3725 }
Guido van Rossume5372401993-03-16 12:15:04 +00003726 if (--fmtcnt >= 0)
3727 c = *fmt++;
3728 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003729 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003730 width = c - '0';
3731 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003732 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003733 if (!isdigit(c))
3734 break;
3735 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003736 PyErr_SetString(
3737 PyExc_ValueError,
3738 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003739 goto error;
3740 }
3741 width = width*10 + (c - '0');
3742 }
3743 }
3744 if (c == '.') {
3745 prec = 0;
3746 if (--fmtcnt >= 0)
3747 c = *fmt++;
3748 if (c == '*') {
3749 v = getnextarg(args, arglen, &argidx);
3750 if (v == NULL)
3751 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003752 if (!PyInt_Check(v)) {
3753 PyErr_SetString(
3754 PyExc_TypeError,
3755 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003756 goto error;
3757 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003758 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003759 if (prec < 0)
3760 prec = 0;
3761 if (--fmtcnt >= 0)
3762 c = *fmt++;
3763 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003764 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003765 prec = c - '0';
3766 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003767 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003768 if (!isdigit(c))
3769 break;
3770 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003771 PyErr_SetString(
3772 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003773 "prec too big");
3774 goto error;
3775 }
3776 prec = prec*10 + (c - '0');
3777 }
3778 }
3779 } /* prec */
3780 if (fmtcnt >= 0) {
3781 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003782 if (--fmtcnt >= 0)
3783 c = *fmt++;
3784 }
3785 }
3786 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003787 PyErr_SetString(PyExc_ValueError,
3788 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003789 goto error;
3790 }
3791 if (c != '%') {
3792 v = getnextarg(args, arglen, &argidx);
3793 if (v == NULL)
3794 goto error;
3795 }
3796 sign = 0;
3797 fill = ' ';
3798 switch (c) {
3799 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003800 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003801 len = 1;
3802 break;
3803 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003804 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003805#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003806 if (PyUnicode_Check(v)) {
3807 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003808 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003809 goto unicode;
3810 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003811#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003812 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003813 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003814 else
3815 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003816 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003817 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003818 if (!PyString_Check(temp)) {
3819 PyErr_SetString(PyExc_TypeError,
3820 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003821 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003822 goto error;
3823 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003824 pbuf = PyString_AS_STRING(temp);
3825 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003826 if (prec >= 0 && len > prec)
3827 len = prec;
3828 break;
3829 case 'i':
3830 case 'd':
3831 case 'u':
3832 case 'o':
3833 case 'x':
3834 case 'X':
3835 if (c == 'i')
3836 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003837 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003838 temp = _PyString_FormatLong(v, flags,
3839 prec, c, &pbuf, &len);
3840 if (!temp)
3841 goto error;
3842 /* unbounded ints can always produce
3843 a sign character! */
3844 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003845 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003846 else {
3847 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003848 len = formatint(pbuf,
3849 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003850 flags, prec, c, v);
3851 if (len < 0)
3852 goto error;
3853 /* only d conversion is signed */
3854 sign = c == 'd';
3855 }
3856 if (flags & F_ZERO)
3857 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003858 break;
3859 case 'e':
3860 case 'E':
3861 case 'f':
3862 case 'g':
3863 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003864 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003865 len = formatfloat(pbuf, sizeof(formatbuf),
3866 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003867 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003868 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003869 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003870 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003871 fill = '0';
3872 break;
3873 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003874 pbuf = formatbuf;
3875 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003876 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003877 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003878 break;
3879 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003880 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003881 "unsupported format character '%c' (0x%x) "
3882 "at index %i",
3883 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003884 goto error;
3885 }
3886 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003887 if (*pbuf == '-' || *pbuf == '+') {
3888 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003889 len--;
3890 }
3891 else if (flags & F_SIGN)
3892 sign = '+';
3893 else if (flags & F_BLANK)
3894 sign = ' ';
3895 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003896 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003897 }
3898 if (width < len)
3899 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003900 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003901 reslen -= rescnt;
3902 rescnt = width + fmtcnt + 100;
3903 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003904 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003905 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003906 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003907 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003908 }
3909 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003910 if (fill != ' ')
3911 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003912 rescnt--;
3913 if (width > len)
3914 width--;
3915 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003916 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3917 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003918 assert(pbuf[1] == c);
3919 if (fill != ' ') {
3920 *res++ = *pbuf++;
3921 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003922 }
Tim Petersfff53252001-04-12 18:38:48 +00003923 rescnt -= 2;
3924 width -= 2;
3925 if (width < 0)
3926 width = 0;
3927 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003928 }
3929 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003930 do {
3931 --rescnt;
3932 *res++ = fill;
3933 } while (--width > len);
3934 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003935 if (fill == ' ') {
3936 if (sign)
3937 *res++ = sign;
3938 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003939 (c == 'x' || c == 'X')) {
3940 assert(pbuf[0] == '0');
3941 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003942 *res++ = *pbuf++;
3943 *res++ = *pbuf++;
3944 }
3945 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003946 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003947 res += len;
3948 rescnt -= len;
3949 while (--width >= len) {
3950 --rescnt;
3951 *res++ = ' ';
3952 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003953 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003954 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003955 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003956 goto error;
3957 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003958 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003959 } /* '%' */
3960 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003961 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003962 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003963 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003964 goto error;
3965 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003966 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003967 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003968 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003969 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003970 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003971
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003972#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003973 unicode:
3974 if (args_owned) {
3975 Py_DECREF(args);
3976 args_owned = 0;
3977 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003978 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003979 if (PyTuple_Check(orig_args) && argidx > 0) {
3980 PyObject *v;
3981 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3982 v = PyTuple_New(n);
3983 if (v == NULL)
3984 goto error;
3985 while (--n >= 0) {
3986 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3987 Py_INCREF(w);
3988 PyTuple_SET_ITEM(v, n, w);
3989 }
3990 args = v;
3991 } else {
3992 Py_INCREF(orig_args);
3993 args = orig_args;
3994 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003995 args_owned = 1;
3996 /* Take what we have of the result and let the Unicode formatting
3997 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003998 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003999 if (_PyString_Resize(&result, rescnt))
4000 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004001 fmtcnt = PyString_GET_SIZE(format) - \
4002 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004003 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4004 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004005 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004006 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004007 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004008 if (v == NULL)
4009 goto error;
4010 /* Paste what we have (result) to what the Unicode formatting
4011 function returned (v) and return the result (or error) */
4012 w = PyUnicode_Concat(result, v);
4013 Py_DECREF(result);
4014 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004015 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004016 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004017#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004018
Guido van Rossume5372401993-03-16 12:15:04 +00004019 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004020 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004021 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004022 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004023 }
Guido van Rossume5372401993-03-16 12:15:04 +00004024 return NULL;
4025}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004026
Guido van Rossum2a61e741997-01-18 07:55:05 +00004027void
Fred Drakeba096332000-07-09 07:04:36 +00004028PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004029{
4030 register PyStringObject *s = (PyStringObject *)(*p);
4031 PyObject *t;
4032 if (s == NULL || !PyString_Check(s))
4033 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004034 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004035 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004036 if (interned == NULL) {
4037 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004038 if (interned == NULL) {
4039 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004040 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004041 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004042 }
4043 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4044 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004045 Py_DECREF(*p);
4046 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004047 return;
4048 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004049 /* Ensure that only true string objects appear in the intern dict */
4050 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004051 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4052 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004053 if (t == NULL) {
4054 PyErr_Clear();
4055 return;
Tim Peters111f6092001-09-12 07:54:51 +00004056 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004057 } else {
4058 t = (PyObject*) s;
4059 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004060 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004061
4062 if (PyDict_SetItem(interned, t, t) == 0) {
4063 /* The two references in interned are not counted by
4064 refcnt. The string deallocator will take care of this */
4065 ((PyObject *)t)->ob_refcnt-=2;
4066 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4067 Py_DECREF(*p);
4068 *p = t;
4069 return;
4070 }
4071 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004072 PyErr_Clear();
4073}
4074
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004075void
4076PyString_InternImmortal(PyObject **p)
4077{
4078 PyString_InternInPlace(p);
4079 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4080 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4081 Py_INCREF(*p);
4082 }
4083}
4084
Guido van Rossum2a61e741997-01-18 07:55:05 +00004085
4086PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004087PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004088{
4089 PyObject *s = PyString_FromString(cp);
4090 if (s == NULL)
4091 return NULL;
4092 PyString_InternInPlace(&s);
4093 return s;
4094}
4095
Guido van Rossum8cf04761997-08-02 02:57:45 +00004096void
Fred Drakeba096332000-07-09 07:04:36 +00004097PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004098{
4099 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004100 for (i = 0; i < UCHAR_MAX + 1; i++) {
4101 Py_XDECREF(characters[i]);
4102 characters[i] = NULL;
4103 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004104 Py_XDECREF(nullstring);
4105 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004106}
Barry Warsawa903ad982001-02-23 16:40:48 +00004107
Barry Warsawa903ad982001-02-23 16:40:48 +00004108void _Py_ReleaseInternedStrings(void)
4109{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004110 PyObject *keys;
4111 PyStringObject *s;
4112 int i, n;
4113
4114 if (interned == NULL || !PyDict_Check(interned))
4115 return;
4116 keys = PyDict_Keys(interned);
4117 if (keys == NULL || !PyList_Check(keys)) {
4118 PyErr_Clear();
4119 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004120 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004121
4122 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4123 detector, interned strings are not forcibly deallocated; rather, we
4124 give them their stolen references back, and then clear and DECREF
4125 the interned dict. */
4126
4127 fprintf(stderr, "releasing interned strings\n");
4128 n = PyList_GET_SIZE(keys);
4129 for (i = 0; i < n; i++) {
4130 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4131 switch (s->ob_sstate) {
4132 case SSTATE_NOT_INTERNED:
4133 /* XXX Shouldn't happen */
4134 break;
4135 case SSTATE_INTERNED_IMMORTAL:
4136 s->ob_refcnt += 1;
4137 break;
4138 case SSTATE_INTERNED_MORTAL:
4139 s->ob_refcnt += 2;
4140 break;
4141 default:
4142 Py_FatalError("Inconsistent interned string state.");
4143 }
4144 s->ob_sstate = SSTATE_NOT_INTERNED;
4145 }
4146 Py_DECREF(keys);
4147 PyDict_Clear(interned);
4148 Py_DECREF(interned);
4149 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004150}