blob: 9598ffb3cfdeb6278cf6e3c8faaa8bb8fd93cf12 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000536 int newlen = recode_encoding ? 4*len:len;
537 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000544 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545#ifdef Py_USING_UNICODE
546 if (recode_encoding && (*s & 0x80)) {
547 PyObject *u, *w;
548 char *r;
549 const char* t;
550 int rn;
551 t = s;
552 /* Decode non-ASCII bytes as UTF-8. */
553 while (t < end && (*t & 0x80)) t++;
554 u = PyUnicode_DecodeUTF8(s, t - s, errors);
555 if(!u) goto failed;
556
557 /* Recode them in target encoding. */
558 w = PyUnicode_AsEncodedString(
559 u, recode_encoding, errors);
560 Py_DECREF(u);
561 if (!w) goto failed;
562
563 /* Append bytes to output buffer. */
564 r = PyString_AsString(w);
565 rn = PyString_Size(w);
566 memcpy(p, r, rn);
567 p += rn;
568 Py_DECREF(w);
569 s = t;
570 } else {
571 *p++ = *s++;
572 }
573#else
574 *p++ = *s++;
575#endif
576 continue;
577 }
578 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000579 if (s==end) {
580 PyErr_SetString(PyExc_ValueError,
581 "Trailing \\ in string");
582 goto failed;
583 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000584 switch (*s++) {
585 /* XXX This assumes ASCII! */
586 case '\n': break;
587 case '\\': *p++ = '\\'; break;
588 case '\'': *p++ = '\''; break;
589 case '\"': *p++ = '\"'; break;
590 case 'b': *p++ = '\b'; break;
591 case 'f': *p++ = '\014'; break; /* FF */
592 case 't': *p++ = '\t'; break;
593 case 'n': *p++ = '\n'; break;
594 case 'r': *p++ = '\r'; break;
595 case 'v': *p++ = '\013'; break; /* VT */
596 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
597 case '0': case '1': case '2': case '3':
598 case '4': case '5': case '6': case '7':
599 c = s[-1] - '0';
600 if ('0' <= *s && *s <= '7') {
601 c = (c<<3) + *s++ - '0';
602 if ('0' <= *s && *s <= '7')
603 c = (c<<3) + *s++ - '0';
604 }
605 *p++ = c;
606 break;
607 case 'x':
608 if (isxdigit(Py_CHARMASK(s[0]))
609 && isxdigit(Py_CHARMASK(s[1]))) {
610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
613 if (isdigit(c))
614 x = c - '0';
615 else if (islower(c))
616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x += c - '0';
624 else if (islower(c))
625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
630 }
631 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000632 PyErr_SetString(PyExc_ValueError,
633 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000634 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 }
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000645 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 }
647#ifndef Py_USING_UNICODE
648 case 'u':
649 case 'U':
650 case 'N':
651 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "Unicode escapes not legal "
654 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657#endif
658 default:
659 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000660 s--;
661 goto non_esc; /* an arbitry number of unescaped
662 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 }
664 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000665 if (p-buf < newlen)
666 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 return v;
668 failed:
669 Py_DECREF(v);
670 return NULL;
671}
672
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000673static int
674string_getsize(register PyObject *op)
675{
676 char *s;
677 int len;
678 if (PyString_AsStringAndSize(op, &s, &len))
679 return -1;
680 return len;
681}
682
683static /*const*/ char *
684string_getbuffer(register PyObject *op)
685{
686 char *s;
687 int len;
688 if (PyString_AsStringAndSize(op, &s, &len))
689 return NULL;
690 return s;
691}
692
Guido van Rossumd7047b31995-01-02 19:07:15 +0000693int
Fred Drakeba096332000-07-09 07:04:36 +0000694PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696 if (!PyString_Check(op))
697 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000698 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699}
700
701/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000702PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (!PyString_Check(op))
705 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707}
708
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709int
710PyString_AsStringAndSize(register PyObject *obj,
711 register char **s,
712 register int *len)
713{
714 if (s == NULL) {
715 PyErr_BadInternalCall();
716 return -1;
717 }
718
719 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000720#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (PyUnicode_Check(obj)) {
722 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
723 if (obj == NULL)
724 return -1;
725 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000726 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000727#endif
728 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 PyErr_Format(PyExc_TypeError,
730 "expected string or Unicode object, "
731 "%.200s found", obj->ob_type->tp_name);
732 return -1;
733 }
734 }
735
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
739 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
743 }
744 return 0;
745}
746
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747/* Methods */
748
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000749static int
Fred Drakeba096332000-07-09 07:04:36 +0000750string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751{
752 int i;
753 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000754 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000755
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000756 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000757 if (! PyString_CheckExact(op)) {
758 int ret;
759 /* A str subclass may have its own __str__ method. */
760 op = (PyStringObject *) PyObject_Str((PyObject *)op);
761 if (op == NULL)
762 return -1;
763 ret = string_print(op, fp, flags);
764 Py_DECREF(op);
765 return ret;
766 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000768#ifdef __VMS
769 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
770#else
771 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
772#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000773 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775
Thomas Wouters7e474022000-07-16 12:04:32 +0000776 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000777 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000778 if (memchr(op->ob_sval, '\'', op->ob_size) &&
779 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 quote = '"';
781
782 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783 for (i = 0; i < op->ob_size; i++) {
784 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000785 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000789 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000791 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fprintf(fp, "\\r");
793 else if (c < ' ' || c >= 0x7f)
794 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000795 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000796 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000798 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000799 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800}
801
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000802PyObject *
803PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000805 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000806 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
807 PyObject *v;
808 if (newsize > INT_MAX) {
809 PyErr_SetString(PyExc_OverflowError,
810 "string is too large to make repr");
811 }
812 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000814 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815 }
816 else {
817 register int i;
818 register char c;
819 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 int quote;
821
Thomas Wouters7e474022000-07-16 12:04:32 +0000822 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000824 if (smartquotes &&
825 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000826 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000827 quote = '"';
828
Tim Peters9161c8b2001-12-03 01:55:38 +0000829 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000832 /* There's at least enough room for a hex escape
833 and a closing quote. */
834 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
839 *p++ = '\\', *p++ = 't';
840 else if (c == '\n')
841 *p++ = '\\', *p++ = 'n';
842 else if (c == '\r')
843 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 else if (c < ' ' || c >= 0x7f) {
845 /* For performance, we don't want to call
846 PyOS_snprintf here (extra layers of
847 function call). */
848 sprintf(p, "\\x%02x", c & 0xff);
849 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000850 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000851 else
852 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000857 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000858 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000859 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000860 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864string_repr(PyObject *op)
865{
866 return PyString_Repr(op, 1);
867}
868
869static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000870string_str(PyObject *s)
871{
Tim Petersc9933152001-10-16 20:18:24 +0000872 assert(PyString_Check(s));
873 if (PyString_CheckExact(s)) {
874 Py_INCREF(s);
875 return s;
876 }
877 else {
878 /* Subtype -- return genuine string with the same value. */
879 PyStringObject *t = (PyStringObject *) s;
880 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
881 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000882}
883
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884static int
Fred Drakeba096332000-07-09 07:04:36 +0000885string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 return a->ob_size;
888}
889
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000891string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892{
893 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 register PyStringObject *op;
895 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000896#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000897 if (PyUnicode_Check(bb))
898 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000899#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000900 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000901 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000902 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903 return NULL;
904 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000905#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000906 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000907 if ((a->ob_size == 0 || b->ob_size == 0) &&
908 PyString_CheckExact(a) && PyString_CheckExact(b)) {
909 if (a->ob_size == 0) {
910 Py_INCREF(bb);
911 return bb;
912 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913 Py_INCREF(a);
914 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
916 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000917 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000918 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000919 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000920 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000922 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000923 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000924 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000925 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
926 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
927 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000929#undef b
930}
931
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000933string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934{
935 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000936 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000937 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000939 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 if (n < 0)
941 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000942 /* watch out for overflows: the size can overflow int,
943 * and the # of bytes needed can overflow size_t
944 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000946 if (n && size / n != a->ob_size) {
947 PyErr_SetString(PyExc_OverflowError,
948 "repeated string is too long");
949 return NULL;
950 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000951 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
Tim Peters8f422462000-09-09 06:13:41 +0000955 nbytes = size * sizeof(char);
956 if (nbytes / sizeof(char) != (size_t)size ||
957 nbytes + sizeof(PyStringObject) <= nbytes) {
958 PyErr_SetString(PyExc_OverflowError,
959 "repeated string is too long");
960 return NULL;
961 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000963 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000966 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000967 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000968 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000969 op->ob_sval[size] = '\0';
970 if (a->ob_size == 1 && n > 0) {
971 memset(op->ob_sval, a->ob_sval[0] , n);
972 return (PyObject *) op;
973 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000974 i = 0;
975 if (i < size) {
976 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
977 i = (int) a->ob_size;
978 }
979 while (i < size) {
980 j = (i <= size-i) ? i : size-i;
981 memcpy(op->ob_sval+i, op->ob_sval, j);
982 i += j;
983 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985}
986
987/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
988
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000990string_slice(register PyStringObject *a, register int i, register int j)
991 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992{
993 if (i < 0)
994 i = 0;
995 if (j < 0)
996 j = 0; /* Avoid signed/unsigned bug in next line */
997 if (j > a->ob_size)
998 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000999 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1000 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001001 Py_INCREF(a);
1002 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 }
1004 if (j < i)
1005 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007}
1008
Guido van Rossum9284a572000-03-07 15:53:43 +00001009static int
Fred Drakeba096332000-07-09 07:04:36 +00001010string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001011{
Barry Warsaw817918c2002-08-06 16:58:21 +00001012 const char *lhs, *rhs, *end;
1013 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014
1015 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001016#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001017 if (PyUnicode_Check(el))
1018 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001019#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001020 if (!PyString_Check(el)) {
1021 PyErr_SetString(PyExc_TypeError,
1022 "'in <string>' requires string as left operand");
1023 return -1;
1024 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001025 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001026 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001027 rhs = PyString_AS_STRING(el);
1028 lhs = PyString_AS_STRING(a);
1029
1030 /* optimize for a single character */
1031 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001032 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001033
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001034 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001035 while (lhs <= end) {
1036 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001037 return 1;
1038 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001039
Guido van Rossum9284a572000-03-07 15:53:43 +00001040 return 0;
1041}
1042
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001043static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001044string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001045{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001047 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001049 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050 return NULL;
1051 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001052 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001053 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001054 if (v == NULL)
1055 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001056 else {
1057#ifdef COUNT_ALLOCS
1058 one_strings++;
1059#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001060 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001061 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001062 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063}
1064
Martin v. Löwiscd353062001-05-24 16:56:35 +00001065static PyObject*
1066string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001068 int c;
1069 int len_a, len_b;
1070 int min_len;
1071 PyObject *result;
1072
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001073 /* Make sure both arguments are strings. */
1074 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001075 result = Py_NotImplemented;
1076 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001077 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001078 if (a == b) {
1079 switch (op) {
1080 case Py_EQ:case Py_LE:case Py_GE:
1081 result = Py_True;
1082 goto out;
1083 case Py_NE:case Py_LT:case Py_GT:
1084 result = Py_False;
1085 goto out;
1086 }
1087 }
1088 if (op == Py_EQ) {
1089 /* Supporting Py_NE here as well does not save
1090 much time, since Py_NE is rarely used. */
1091 if (a->ob_size == b->ob_size
1092 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001093 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001094 a->ob_size) == 0)) {
1095 result = Py_True;
1096 } else {
1097 result = Py_False;
1098 }
1099 goto out;
1100 }
1101 len_a = a->ob_size; len_b = b->ob_size;
1102 min_len = (len_a < len_b) ? len_a : len_b;
1103 if (min_len > 0) {
1104 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1105 if (c==0)
1106 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1107 }else
1108 c = 0;
1109 if (c == 0)
1110 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1111 switch (op) {
1112 case Py_LT: c = c < 0; break;
1113 case Py_LE: c = c <= 0; break;
1114 case Py_EQ: assert(0); break; /* unreachable */
1115 case Py_NE: c = c != 0; break;
1116 case Py_GT: c = c > 0; break;
1117 case Py_GE: c = c >= 0; break;
1118 default:
1119 result = Py_NotImplemented;
1120 goto out;
1121 }
1122 result = c ? Py_True : Py_False;
1123 out:
1124 Py_INCREF(result);
1125 return result;
1126}
1127
1128int
1129_PyString_Eq(PyObject *o1, PyObject *o2)
1130{
1131 PyStringObject *a, *b;
1132 a = (PyStringObject*)o1;
1133 b = (PyStringObject*)o2;
1134 return a->ob_size == b->ob_size
1135 && *a->ob_sval == *b->ob_sval
1136 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001137}
1138
Guido van Rossum9bfef441993-03-29 10:43:31 +00001139static long
Fred Drakeba096332000-07-09 07:04:36 +00001140string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001141{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001142 register int len;
1143 register unsigned char *p;
1144 register long x;
1145
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001146 if (a->ob_shash != -1)
1147 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 len = a->ob_size;
1149 p = (unsigned char *) a->ob_sval;
1150 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001151 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001152 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001153 x ^= a->ob_size;
1154 if (x == -1)
1155 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001156 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001157 return x;
1158}
1159
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001160static PyObject*
1161string_subscript(PyStringObject* self, PyObject* item)
1162{
1163 if (PyInt_Check(item)) {
1164 long i = PyInt_AS_LONG(item);
1165 if (i < 0)
1166 i += PyString_GET_SIZE(self);
1167 return string_item(self,i);
1168 }
1169 else if (PyLong_Check(item)) {
1170 long i = PyLong_AsLong(item);
1171 if (i == -1 && PyErr_Occurred())
1172 return NULL;
1173 if (i < 0)
1174 i += PyString_GET_SIZE(self);
1175 return string_item(self,i);
1176 }
1177 else if (PySlice_Check(item)) {
1178 int start, stop, step, slicelength, cur, i;
1179 char* source_buf;
1180 char* result_buf;
1181 PyObject* result;
1182
1183 if (PySlice_GetIndicesEx((PySliceObject*)item,
1184 PyString_GET_SIZE(self),
1185 &start, &stop, &step, &slicelength) < 0) {
1186 return NULL;
1187 }
1188
1189 if (slicelength <= 0) {
1190 return PyString_FromStringAndSize("", 0);
1191 }
1192 else {
1193 source_buf = PyString_AsString((PyObject*)self);
1194 result_buf = PyMem_Malloc(slicelength);
1195
1196 for (cur = start, i = 0; i < slicelength;
1197 cur += step, i++) {
1198 result_buf[i] = source_buf[cur];
1199 }
1200
1201 result = PyString_FromStringAndSize(result_buf,
1202 slicelength);
1203 PyMem_Free(result_buf);
1204 return result;
1205 }
1206 }
1207 else {
1208 PyErr_SetString(PyExc_TypeError,
1209 "string indices must be integers");
1210 return NULL;
1211 }
1212}
1213
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001214static int
Fred Drakeba096332000-07-09 07:04:36 +00001215string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001216{
1217 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001218 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001219 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001220 return -1;
1221 }
1222 *ptr = (void *)self->ob_sval;
1223 return self->ob_size;
1224}
1225
1226static int
Fred Drakeba096332000-07-09 07:04:36 +00001227string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228{
Guido van Rossum045e6881997-09-08 18:30:11 +00001229 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001230 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001231 return -1;
1232}
1233
1234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001236{
1237 if ( lenp )
1238 *lenp = self->ob_size;
1239 return 1;
1240}
1241
Guido van Rossum1db70701998-10-08 02:18:52 +00001242static int
Fred Drakeba096332000-07-09 07:04:36 +00001243string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001244{
1245 if ( index != 0 ) {
1246 PyErr_SetString(PyExc_SystemError,
1247 "accessing non-existent string segment");
1248 return -1;
1249 }
1250 *ptr = self->ob_sval;
1251 return self->ob_size;
1252}
1253
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001254static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001255 (inquiry)string_length, /*sq_length*/
1256 (binaryfunc)string_concat, /*sq_concat*/
1257 (intargfunc)string_repeat, /*sq_repeat*/
1258 (intargfunc)string_item, /*sq_item*/
1259 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001260 0, /*sq_ass_item*/
1261 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001262 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001263};
1264
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001265static PyMappingMethods string_as_mapping = {
1266 (inquiry)string_length,
1267 (binaryfunc)string_subscript,
1268 0,
1269};
1270
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001271static PyBufferProcs string_as_buffer = {
1272 (getreadbufferproc)string_buffer_getreadbuf,
1273 (getwritebufferproc)string_buffer_getwritebuf,
1274 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001275 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001276};
1277
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278
1279
1280#define LEFTSTRIP 0
1281#define RIGHTSTRIP 1
1282#define BOTHSTRIP 2
1283
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001284/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001285static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1286
1287#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001288
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289
1290static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001291split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001293 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001294 PyObject* item;
1295 PyObject *list = PyList_New(0);
1296
1297 if (list == NULL)
1298 return NULL;
1299
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 for (i = j = 0; i < len; ) {
1301 while (i < len && isspace(Py_CHARMASK(s[i])))
1302 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001304 while (i < len && !isspace(Py_CHARMASK(s[i])))
1305 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307 if (maxsplit-- <= 0)
1308 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1310 if (item == NULL)
1311 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001312 err = PyList_Append(list, item);
1313 Py_DECREF(item);
1314 if (err < 0)
1315 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316 while (i < len && isspace(Py_CHARMASK(s[i])))
1317 i++;
1318 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319 }
1320 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001321 if (j < len) {
1322 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1323 if (item == NULL)
1324 goto finally;
1325 err = PyList_Append(list, item);
1326 Py_DECREF(item);
1327 if (err < 0)
1328 goto finally;
1329 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 return list;
1331 finally:
1332 Py_DECREF(list);
1333 return NULL;
1334}
1335
1336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001337PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338"S.split([sep [,maxsplit]]) -> list of strings\n\
1339\n\
1340Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001342splits are done. If sep is not specified or is None, any\n\
1343whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344
1345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001346string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347{
1348 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 int maxsplit = -1;
1350 const char *s = PyString_AS_STRING(self), *sub;
1351 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 if (maxsplit < 0)
1356 maxsplit = INT_MAX;
1357 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 if (PyString_Check(subobj)) {
1360 sub = PyString_AS_STRING(subobj);
1361 n = PyString_GET_SIZE(subobj);
1362 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001363#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001364 else if (PyUnicode_Check(subobj))
1365 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001366#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001367 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1368 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369 if (n == 0) {
1370 PyErr_SetString(PyExc_ValueError, "empty separator");
1371 return NULL;
1372 }
1373
1374 list = PyList_New(0);
1375 if (list == NULL)
1376 return NULL;
1377
1378 i = j = 0;
1379 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001380 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001381 if (maxsplit-- <= 0)
1382 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1384 if (item == NULL)
1385 goto fail;
1386 err = PyList_Append(list, item);
1387 Py_DECREF(item);
1388 if (err < 0)
1389 goto fail;
1390 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 }
1392 else
1393 i++;
1394 }
1395 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1396 if (item == NULL)
1397 goto fail;
1398 err = PyList_Append(list, item);
1399 Py_DECREF(item);
1400 if (err < 0)
1401 goto fail;
1402
1403 return list;
1404
1405 fail:
1406 Py_DECREF(list);
1407 return NULL;
1408}
1409
1410
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001411PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412"S.join(sequence) -> string\n\
1413\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001414Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001415sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416
1417static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001418string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419{
1420 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001421 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 char *p;
1424 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001425 size_t sz = 0;
1426 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001427 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428
Tim Peters19fe14e2001-01-19 03:03:47 +00001429 seq = PySequence_Fast(orig, "");
1430 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001431 if (PyErr_ExceptionMatches(PyExc_TypeError))
1432 PyErr_Format(PyExc_TypeError,
1433 "sequence expected, %.80s found",
1434 orig->ob_type->tp_name);
1435 return NULL;
1436 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001437
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001438 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001439 if (seqlen == 0) {
1440 Py_DECREF(seq);
1441 return PyString_FromString("");
1442 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001444 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001445 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1446 PyErr_Format(PyExc_TypeError,
1447 "sequence item 0: expected string,"
1448 " %.80s found",
1449 item->ob_type->tp_name);
1450 Py_DECREF(seq);
1451 return NULL;
1452 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001453 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001454 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001455 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001457
Tim Peters19fe14e2001-01-19 03:03:47 +00001458 /* There are at least two things to join. Do a pre-pass to figure out
1459 * the total amount of space we'll need (sz), see whether any argument
1460 * is absurd, and defer to the Unicode join if appropriate.
1461 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001463 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001464 item = PySequence_Fast_GET_ITEM(seq, i);
1465 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001466#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001467 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001468 /* Defer to Unicode join.
1469 * CAUTION: There's no gurantee that the
1470 * original sequence can be iterated over
1471 * again, so we must pass seq here.
1472 */
1473 PyObject *result;
1474 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001475 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001476 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001477 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001478#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001479 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001480 "sequence item %i: expected string,"
1481 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001482 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001483 Py_DECREF(seq);
1484 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001485 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001486 sz += PyString_GET_SIZE(item);
1487 if (i != 0)
1488 sz += seplen;
1489 if (sz < old_sz || sz > INT_MAX) {
1490 PyErr_SetString(PyExc_OverflowError,
1491 "join() is too long for a Python string");
1492 Py_DECREF(seq);
1493 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001495 }
1496
1497 /* Allocate result space. */
1498 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1499 if (res == NULL) {
1500 Py_DECREF(seq);
1501 return NULL;
1502 }
1503
1504 /* Catenate everything. */
1505 p = PyString_AS_STRING(res);
1506 for (i = 0; i < seqlen; ++i) {
1507 size_t n;
1508 item = PySequence_Fast_GET_ITEM(seq, i);
1509 n = PyString_GET_SIZE(item);
1510 memcpy(p, PyString_AS_STRING(item), n);
1511 p += n;
1512 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001513 memcpy(p, sep, seplen);
1514 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001515 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001517
Jeremy Hylton49048292000-07-11 03:28:17 +00001518 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520}
1521
Tim Peters52e155e2001-06-16 05:42:57 +00001522PyObject *
1523_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001524{
Tim Petersa7259592001-06-16 05:11:17 +00001525 assert(sep != NULL && PyString_Check(sep));
1526 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001527 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001528}
1529
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001530static void
1531string_adjust_indices(int *start, int *end, int len)
1532{
1533 if (*end > len)
1534 *end = len;
1535 else if (*end < 0)
1536 *end += len;
1537 if (*end < 0)
1538 *end = 0;
1539 if (*start < 0)
1540 *start += len;
1541 if (*start < 0)
1542 *start = 0;
1543}
1544
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545static long
Fred Drakeba096332000-07-09 07:04:36 +00001546string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001548 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549 int len = PyString_GET_SIZE(self);
1550 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001551 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001553 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001554 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001555 return -2;
1556 if (PyString_Check(subobj)) {
1557 sub = PyString_AS_STRING(subobj);
1558 n = PyString_GET_SIZE(subobj);
1559 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001560#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001561 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001562 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001563#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565 return -2;
1566
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001567 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 if (dir > 0) {
1570 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001571 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001572 last -= n;
1573 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001574 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 return (long)i;
1576 }
1577 else {
1578 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001579
Guido van Rossum4c08d552000-03-10 22:55:18 +00001580 if (n == 0 && i <= last)
1581 return (long)last;
1582 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001583 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 return (long)j;
1585 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001586
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587 return -1;
1588}
1589
1590
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001591PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592"S.find(sub [,start [,end]]) -> int\n\
1593\n\
1594Return the lowest index in S where substring sub is found,\n\
1595such that sub is contained within s[start,end]. Optional\n\
1596arguments start and end are interpreted as in slice notation.\n\
1597\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001598Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599
1600static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001601string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001603 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604 if (result == -2)
1605 return NULL;
1606 return PyInt_FromLong(result);
1607}
1608
1609
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001610PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611"S.index(sub [,start [,end]]) -> int\n\
1612\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001613Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614
1615static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001616string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001618 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 if (result == -2)
1620 return NULL;
1621 if (result == -1) {
1622 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001623 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624 return NULL;
1625 }
1626 return PyInt_FromLong(result);
1627}
1628
1629
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001630PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631"S.rfind(sub [,start [,end]]) -> int\n\
1632\n\
1633Return the highest index in S where substring sub is found,\n\
1634such that sub is contained within s[start,end]. Optional\n\
1635arguments start and end are interpreted as in slice notation.\n\
1636\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001637Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638
1639static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001640string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001642 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643 if (result == -2)
1644 return NULL;
1645 return PyInt_FromLong(result);
1646}
1647
1648
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001649PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001650"S.rindex(sub [,start [,end]]) -> int\n\
1651\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001652Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653
1654static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001655string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001657 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 if (result == -2)
1659 return NULL;
1660 if (result == -1) {
1661 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001662 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663 return NULL;
1664 }
1665 return PyInt_FromLong(result);
1666}
1667
1668
1669static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001670do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1671{
1672 char *s = PyString_AS_STRING(self);
1673 int len = PyString_GET_SIZE(self);
1674 char *sep = PyString_AS_STRING(sepobj);
1675 int seplen = PyString_GET_SIZE(sepobj);
1676 int i, j;
1677
1678 i = 0;
1679 if (striptype != RIGHTSTRIP) {
1680 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1681 i++;
1682 }
1683 }
1684
1685 j = len;
1686 if (striptype != LEFTSTRIP) {
1687 do {
1688 j--;
1689 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1690 j++;
1691 }
1692
1693 if (i == 0 && j == len && PyString_CheckExact(self)) {
1694 Py_INCREF(self);
1695 return (PyObject*)self;
1696 }
1697 else
1698 return PyString_FromStringAndSize(s+i, j-i);
1699}
1700
1701
1702static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001703do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704{
1705 char *s = PyString_AS_STRING(self);
1706 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001708 i = 0;
1709 if (striptype != RIGHTSTRIP) {
1710 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1711 i++;
1712 }
1713 }
1714
1715 j = len;
1716 if (striptype != LEFTSTRIP) {
1717 do {
1718 j--;
1719 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1720 j++;
1721 }
1722
Tim Peters8fa5dd02001-09-12 02:18:30 +00001723 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724 Py_INCREF(self);
1725 return (PyObject*)self;
1726 }
1727 else
1728 return PyString_FromStringAndSize(s+i, j-i);
1729}
1730
1731
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001732static PyObject *
1733do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1734{
1735 PyObject *sep = NULL;
1736
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001737 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001738 return NULL;
1739
1740 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001741 if (PyString_Check(sep))
1742 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001743#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001744 else if (PyUnicode_Check(sep)) {
1745 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1746 PyObject *res;
1747 if (uniself==NULL)
1748 return NULL;
1749 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1750 striptype, sep);
1751 Py_DECREF(uniself);
1752 return res;
1753 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001754#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001755 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001756 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001757#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001758 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001759#else
1760 "%s arg must be None or str",
1761#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001762 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001763 return NULL;
1764 }
1765 return do_xstrip(self, striptype, sep);
1766 }
1767
1768 return do_strip(self, striptype);
1769}
1770
1771
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001772PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001773"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774\n\
1775Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001776whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001777If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001778If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779
1780static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001781string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001783 if (PyTuple_GET_SIZE(args) == 0)
1784 return do_strip(self, BOTHSTRIP); /* Common case */
1785 else
1786 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787}
1788
1789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001791"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001793Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001794If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796
1797static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001798string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001800 if (PyTuple_GET_SIZE(args) == 0)
1801 return do_strip(self, LEFTSTRIP); /* Common case */
1802 else
1803 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804}
1805
1806
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001807PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001808"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001810Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001811If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001812If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813
1814static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001815string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001817 if (PyTuple_GET_SIZE(args) == 0)
1818 return do_strip(self, RIGHTSTRIP); /* Common case */
1819 else
1820 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821}
1822
1823
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001824PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825"S.lower() -> string\n\
1826\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001827Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828
1829static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001830string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831{
1832 char *s = PyString_AS_STRING(self), *s_new;
1833 int i, n = PyString_GET_SIZE(self);
1834 PyObject *new;
1835
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836 new = PyString_FromStringAndSize(NULL, n);
1837 if (new == NULL)
1838 return NULL;
1839 s_new = PyString_AsString(new);
1840 for (i = 0; i < n; i++) {
1841 int c = Py_CHARMASK(*s++);
1842 if (isupper(c)) {
1843 *s_new = tolower(c);
1844 } else
1845 *s_new = c;
1846 s_new++;
1847 }
1848 return new;
1849}
1850
1851
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001852PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853"S.upper() -> string\n\
1854\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001855Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856
1857static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001858string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001859{
1860 char *s = PyString_AS_STRING(self), *s_new;
1861 int i, n = PyString_GET_SIZE(self);
1862 PyObject *new;
1863
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 new = PyString_FromStringAndSize(NULL, n);
1865 if (new == NULL)
1866 return NULL;
1867 s_new = PyString_AsString(new);
1868 for (i = 0; i < n; i++) {
1869 int c = Py_CHARMASK(*s++);
1870 if (islower(c)) {
1871 *s_new = toupper(c);
1872 } else
1873 *s_new = c;
1874 s_new++;
1875 }
1876 return new;
1877}
1878
1879
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001880PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881"S.title() -> string\n\
1882\n\
1883Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001884characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885
1886static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001887string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001888{
1889 char *s = PyString_AS_STRING(self), *s_new;
1890 int i, n = PyString_GET_SIZE(self);
1891 int previous_is_cased = 0;
1892 PyObject *new;
1893
Guido van Rossum4c08d552000-03-10 22:55:18 +00001894 new = PyString_FromStringAndSize(NULL, n);
1895 if (new == NULL)
1896 return NULL;
1897 s_new = PyString_AsString(new);
1898 for (i = 0; i < n; i++) {
1899 int c = Py_CHARMASK(*s++);
1900 if (islower(c)) {
1901 if (!previous_is_cased)
1902 c = toupper(c);
1903 previous_is_cased = 1;
1904 } else if (isupper(c)) {
1905 if (previous_is_cased)
1906 c = tolower(c);
1907 previous_is_cased = 1;
1908 } else
1909 previous_is_cased = 0;
1910 *s_new++ = c;
1911 }
1912 return new;
1913}
1914
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001915PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916"S.capitalize() -> string\n\
1917\n\
1918Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001919capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920
1921static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001922string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923{
1924 char *s = PyString_AS_STRING(self), *s_new;
1925 int i, n = PyString_GET_SIZE(self);
1926 PyObject *new;
1927
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928 new = PyString_FromStringAndSize(NULL, n);
1929 if (new == NULL)
1930 return NULL;
1931 s_new = PyString_AsString(new);
1932 if (0 < n) {
1933 int c = Py_CHARMASK(*s++);
1934 if (islower(c))
1935 *s_new = toupper(c);
1936 else
1937 *s_new = c;
1938 s_new++;
1939 }
1940 for (i = 1; i < n; i++) {
1941 int c = Py_CHARMASK(*s++);
1942 if (isupper(c))
1943 *s_new = tolower(c);
1944 else
1945 *s_new = c;
1946 s_new++;
1947 }
1948 return new;
1949}
1950
1951
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001952PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953"S.count(sub[, start[, end]]) -> int\n\
1954\n\
1955Return the number of occurrences of substring sub in string\n\
1956S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001957interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958
1959static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001960string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001962 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963 int len = PyString_GET_SIZE(self), n;
1964 int i = 0, last = INT_MAX;
1965 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001966 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967
Guido van Rossumc6821402000-05-08 14:08:05 +00001968 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1969 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001971
Guido van Rossum4c08d552000-03-10 22:55:18 +00001972 if (PyString_Check(subobj)) {
1973 sub = PyString_AS_STRING(subobj);
1974 n = PyString_GET_SIZE(subobj);
1975 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001976#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001977 else if (PyUnicode_Check(subobj)) {
1978 int count;
1979 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1980 if (count == -1)
1981 return NULL;
1982 else
1983 return PyInt_FromLong((long) count);
1984 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001985#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001986 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1987 return NULL;
1988
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001989 string_adjust_indices(&i, &last, len);
1990
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991 m = last + 1 - n;
1992 if (n == 0)
1993 return PyInt_FromLong((long) (m-i));
1994
1995 r = 0;
1996 while (i < m) {
1997 if (!memcmp(s+i, sub, n)) {
1998 r++;
1999 i += n;
2000 } else {
2001 i++;
2002 }
2003 }
2004 return PyInt_FromLong((long) r);
2005}
2006
2007
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002008PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009"S.swapcase() -> string\n\
2010\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002011Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002012converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013
2014static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002015string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016{
2017 char *s = PyString_AS_STRING(self), *s_new;
2018 int i, n = PyString_GET_SIZE(self);
2019 PyObject *new;
2020
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021 new = PyString_FromStringAndSize(NULL, n);
2022 if (new == NULL)
2023 return NULL;
2024 s_new = PyString_AsString(new);
2025 for (i = 0; i < n; i++) {
2026 int c = Py_CHARMASK(*s++);
2027 if (islower(c)) {
2028 *s_new = toupper(c);
2029 }
2030 else if (isupper(c)) {
2031 *s_new = tolower(c);
2032 }
2033 else
2034 *s_new = c;
2035 s_new++;
2036 }
2037 return new;
2038}
2039
2040
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002041PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042"S.translate(table [,deletechars]) -> string\n\
2043\n\
2044Return a copy of the string S, where all characters occurring\n\
2045in the optional argument deletechars are removed, and the\n\
2046remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002047translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048
2049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002050string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002052 register char *input, *output;
2053 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054 register int i, c, changed = 0;
2055 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002056 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057 int inlen, tablen, dellen = 0;
2058 PyObject *result;
2059 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002060 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002062 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065
2066 if (PyString_Check(tableobj)) {
2067 table1 = PyString_AS_STRING(tableobj);
2068 tablen = PyString_GET_SIZE(tableobj);
2069 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002070#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002072 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002073 parameter; instead a mapping to None will cause characters
2074 to be deleted. */
2075 if (delobj != NULL) {
2076 PyErr_SetString(PyExc_TypeError,
2077 "deletions are implemented differently for unicode");
2078 return NULL;
2079 }
2080 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2081 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002082#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002083 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085
Martin v. Löwis00b61272002-12-12 20:03:19 +00002086 if (tablen != 256) {
2087 PyErr_SetString(PyExc_ValueError,
2088 "translation table must be 256 characters long");
2089 return NULL;
2090 }
2091
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092 if (delobj != NULL) {
2093 if (PyString_Check(delobj)) {
2094 del_table = PyString_AS_STRING(delobj);
2095 dellen = PyString_GET_SIZE(delobj);
2096 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002097#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098 else if (PyUnicode_Check(delobj)) {
2099 PyErr_SetString(PyExc_TypeError,
2100 "deletions are implemented differently for unicode");
2101 return NULL;
2102 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002103#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002104 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2105 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002106 }
2107 else {
2108 del_table = NULL;
2109 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110 }
2111
2112 table = table1;
2113 inlen = PyString_Size(input_obj);
2114 result = PyString_FromStringAndSize((char *)NULL, inlen);
2115 if (result == NULL)
2116 return NULL;
2117 output_start = output = PyString_AsString(result);
2118 input = PyString_AsString(input_obj);
2119
2120 if (dellen == 0) {
2121 /* If no deletions are required, use faster code */
2122 for (i = inlen; --i >= 0; ) {
2123 c = Py_CHARMASK(*input++);
2124 if (Py_CHARMASK((*output++ = table[c])) != c)
2125 changed = 1;
2126 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002127 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 return result;
2129 Py_DECREF(result);
2130 Py_INCREF(input_obj);
2131 return input_obj;
2132 }
2133
2134 for (i = 0; i < 256; i++)
2135 trans_table[i] = Py_CHARMASK(table[i]);
2136
2137 for (i = 0; i < dellen; i++)
2138 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2139
2140 for (i = inlen; --i >= 0; ) {
2141 c = Py_CHARMASK(*input++);
2142 if (trans_table[c] != -1)
2143 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2144 continue;
2145 changed = 1;
2146 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002147 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148 Py_DECREF(result);
2149 Py_INCREF(input_obj);
2150 return input_obj;
2151 }
2152 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002153 if (inlen > 0)
2154 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155 return result;
2156}
2157
2158
2159/* What follows is used for implementing replace(). Perry Stoll. */
2160
2161/*
2162 mymemfind
2163
2164 strstr replacement for arbitrary blocks of memory.
2165
Barry Warsaw51ac5802000-03-20 16:36:48 +00002166 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 contents of memory pointed to by PAT. Returns the index into MEM if
2168 found, or -1 if not found. If len of PAT is greater than length of
2169 MEM, the function returns -1.
2170*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002171static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002172mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173{
2174 register int ii;
2175
2176 /* pattern can not occur in the last pat_len-1 chars */
2177 len -= pat_len;
2178
2179 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002180 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181 return ii;
2182 }
2183 }
2184 return -1;
2185}
2186
2187/*
2188 mymemcnt
2189
2190 Return the number of distinct times PAT is found in MEM.
2191 meaning mem=1111 and pat==11 returns 2.
2192 mem=11111 and pat==11 also return 2.
2193 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002194static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002195mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196{
2197 register int offset = 0;
2198 int nfound = 0;
2199
2200 while (len >= 0) {
2201 offset = mymemfind(mem, len, pat, pat_len);
2202 if (offset == -1)
2203 break;
2204 mem += offset + pat_len;
2205 len -= offset + pat_len;
2206 nfound++;
2207 }
2208 return nfound;
2209}
2210
2211/*
2212 mymemreplace
2213
Thomas Wouters7e474022000-07-16 12:04:32 +00002214 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215 replaced with SUB.
2216
Thomas Wouters7e474022000-07-16 12:04:32 +00002217 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218 of PAT in STR, then the original string is returned. Otherwise, a new
2219 string is allocated here and returned.
2220
2221 on return, out_len is:
2222 the length of output string, or
2223 -1 if the input string is returned, or
2224 unchanged if an error occurs (no memory).
2225
2226 return value is:
2227 the new string allocated locally, or
2228 NULL if an error occurred.
2229*/
2230static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002231mymemreplace(const char *str, int len, /* input string */
2232 const char *pat, int pat_len, /* pattern string to find */
2233 const char *sub, int sub_len, /* substitution string */
2234 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002235 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236{
2237 char *out_s;
2238 char *new_s;
2239 int nfound, offset, new_len;
2240
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002241 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242 goto return_same;
2243
2244 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002245 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002246 if (count < 0)
2247 count = INT_MAX;
2248 else if (nfound > count)
2249 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250 if (nfound == 0)
2251 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002252
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002254 if (new_len == 0) {
2255 /* Have to allocate something for the caller to free(). */
2256 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002257 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002258 return NULL;
2259 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002261 else {
2262 assert(new_len > 0);
2263 new_s = (char *)PyMem_MALLOC(new_len);
2264 if (new_s == NULL)
2265 return NULL;
2266 out_s = new_s;
2267
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002268 if (pat_len > 0) {
2269 for (; nfound > 0; --nfound) {
2270 /* find index of next instance of pattern */
2271 offset = mymemfind(str, len, pat, pat_len);
2272 if (offset == -1)
2273 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002274
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002275 /* copy non matching part of input string */
2276 memcpy(new_s, str, offset);
2277 str += offset + pat_len;
2278 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002279
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002280 /* copy substitute into the output string */
2281 new_s += offset;
2282 memcpy(new_s, sub, sub_len);
2283 new_s += sub_len;
2284 }
2285 /* copy any remaining values into output string */
2286 if (len > 0)
2287 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002288 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002289 else {
2290 for (;;++str, --len) {
2291 memcpy(new_s, sub, sub_len);
2292 new_s += sub_len;
2293 if (--nfound <= 0) {
2294 memcpy(new_s, str, len);
2295 break;
2296 }
2297 *new_s++ = *str;
2298 }
2299 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002300 }
2301 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302 return out_s;
2303
2304 return_same:
2305 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002306 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307}
2308
2309
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002310PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002311"S.replace (old, new[, maxsplit]) -> string\n\
2312\n\
2313Return a copy of string S with all occurrences of substring\n\
2314old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002315given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316
2317static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002318string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 const char *str = PyString_AS_STRING(self), *sub, *repl;
2321 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002322 const int len = PyString_GET_SIZE(self);
2323 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328 if (!PyArg_ParseTuple(args, "OO|i:replace",
2329 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331
2332 if (PyString_Check(subobj)) {
2333 sub = PyString_AS_STRING(subobj);
2334 sub_len = PyString_GET_SIZE(subobj);
2335 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002336#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002338 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002340#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2342 return NULL;
2343
2344 if (PyString_Check(replobj)) {
2345 repl = PyString_AS_STRING(replobj);
2346 repl_len = PyString_GET_SIZE(replobj);
2347 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002348#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002349 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002350 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002352#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002353 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2354 return NULL;
2355
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 if (new_s == NULL) {
2358 PyErr_NoMemory();
2359 return NULL;
2360 }
2361 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002362 if (PyString_CheckExact(self)) {
2363 /* we're returning another reference to self */
2364 new = (PyObject*)self;
2365 Py_INCREF(new);
2366 }
2367 else {
2368 new = PyString_FromStringAndSize(str, len);
2369 if (new == NULL)
2370 return NULL;
2371 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 }
2373 else {
2374 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002375 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376 }
2377 return new;
2378}
2379
2380
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002381PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002382"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002384Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002386comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002387
2388static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002389string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002390{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002391 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 int plen;
2395 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002396 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002397 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398
Guido van Rossumc6821402000-05-08 14:08:05 +00002399 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2400 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401 return NULL;
2402 if (PyString_Check(subobj)) {
2403 prefix = PyString_AS_STRING(subobj);
2404 plen = PyString_GET_SIZE(subobj);
2405 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002406#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002407 else if (PyUnicode_Check(subobj)) {
2408 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002409 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002410 subobj, start, end, -1);
2411 if (rc == -1)
2412 return NULL;
2413 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002414 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002415 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002416#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002417 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418 return NULL;
2419
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002420 string_adjust_indices(&start, &end, len);
2421
2422 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002423 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002425 if (end-start >= plen)
2426 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2427 else
2428 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429}
2430
2431
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002432PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002433"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002435Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002436optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002437comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438
2439static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002440string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 const char* suffix;
2445 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002447 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449
Guido van Rossumc6821402000-05-08 14:08:05 +00002450 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2451 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002452 return NULL;
2453 if (PyString_Check(subobj)) {
2454 suffix = PyString_AS_STRING(subobj);
2455 slen = PyString_GET_SIZE(subobj);
2456 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002457#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002458 else if (PyUnicode_Check(subobj)) {
2459 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002460 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002461 subobj, start, end, +1);
2462 if (rc == -1)
2463 return NULL;
2464 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002465 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002467#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002468 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469 return NULL;
2470
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002471 string_adjust_indices(&start, &end, len);
2472
2473 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002474 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002475
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002476 if (end-slen > start)
2477 start = end - slen;
2478 if (end-start >= slen)
2479 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2480 else
2481 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482}
2483
2484
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002485PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002486"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002487\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002488Encodes S using the codec registered for encoding. encoding defaults\n\
2489to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002490handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002491a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2492'xmlcharrefreplace' as well as any other name registered with\n\
2493codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002494
2495static PyObject *
2496string_encode(PyStringObject *self, PyObject *args)
2497{
2498 char *encoding = NULL;
2499 char *errors = NULL;
2500 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2501 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002502 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2503}
2504
2505
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002506PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002507"S.decode([encoding[,errors]]) -> object\n\
2508\n\
2509Decodes S using the codec registered for encoding. encoding defaults\n\
2510to the default encoding. errors may be given to set a different error\n\
2511handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002512a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2513as well as any other name registerd with codecs.register_error that is\n\
2514able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002515
2516static PyObject *
2517string_decode(PyStringObject *self, PyObject *args)
2518{
2519 char *encoding = NULL;
2520 char *errors = NULL;
2521 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2522 return NULL;
2523 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002524}
2525
2526
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002527PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528"S.expandtabs([tabsize]) -> string\n\
2529\n\
2530Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002531If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532
2533static PyObject*
2534string_expandtabs(PyStringObject *self, PyObject *args)
2535{
2536 const char *e, *p;
2537 char *q;
2538 int i, j;
2539 PyObject *u;
2540 int tabsize = 8;
2541
2542 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2543 return NULL;
2544
Thomas Wouters7e474022000-07-16 12:04:32 +00002545 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002546 i = j = 0;
2547 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2548 for (p = PyString_AS_STRING(self); p < e; p++)
2549 if (*p == '\t') {
2550 if (tabsize > 0)
2551 j += tabsize - (j % tabsize);
2552 }
2553 else {
2554 j++;
2555 if (*p == '\n' || *p == '\r') {
2556 i += j;
2557 j = 0;
2558 }
2559 }
2560
2561 /* Second pass: create output string and fill it */
2562 u = PyString_FromStringAndSize(NULL, i + j);
2563 if (!u)
2564 return NULL;
2565
2566 j = 0;
2567 q = PyString_AS_STRING(u);
2568
2569 for (p = PyString_AS_STRING(self); p < e; p++)
2570 if (*p == '\t') {
2571 if (tabsize > 0) {
2572 i = tabsize - (j % tabsize);
2573 j += i;
2574 while (i--)
2575 *q++ = ' ';
2576 }
2577 }
2578 else {
2579 j++;
2580 *q++ = *p;
2581 if (*p == '\n' || *p == '\r')
2582 j = 0;
2583 }
2584
2585 return u;
2586}
2587
Tim Peters8fa5dd02001-09-12 02:18:30 +00002588static PyObject *
2589pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590{
2591 PyObject *u;
2592
2593 if (left < 0)
2594 left = 0;
2595 if (right < 0)
2596 right = 0;
2597
Tim Peters8fa5dd02001-09-12 02:18:30 +00002598 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 Py_INCREF(self);
2600 return (PyObject *)self;
2601 }
2602
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002603 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604 left + PyString_GET_SIZE(self) + right);
2605 if (u) {
2606 if (left)
2607 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002608 memcpy(PyString_AS_STRING(u) + left,
2609 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002610 PyString_GET_SIZE(self));
2611 if (right)
2612 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2613 fill, right);
2614 }
2615
2616 return u;
2617}
2618
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002619PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002620"S.ljust(width) -> string\n"
2621"\n"
2622"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002623"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624
2625static PyObject *
2626string_ljust(PyStringObject *self, PyObject *args)
2627{
2628 int width;
2629 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2630 return NULL;
2631
Tim Peters8fa5dd02001-09-12 02:18:30 +00002632 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633 Py_INCREF(self);
2634 return (PyObject*) self;
2635 }
2636
2637 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2638}
2639
2640
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002641PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002642"S.rjust(width) -> string\n"
2643"\n"
2644"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002645"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646
2647static PyObject *
2648string_rjust(PyStringObject *self, PyObject *args)
2649{
2650 int width;
2651 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2652 return NULL;
2653
Tim Peters8fa5dd02001-09-12 02:18:30 +00002654 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655 Py_INCREF(self);
2656 return (PyObject*) self;
2657 }
2658
2659 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2660}
2661
2662
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002663PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002664"S.center(width) -> string\n"
2665"\n"
2666"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002667"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002668
2669static PyObject *
2670string_center(PyStringObject *self, PyObject *args)
2671{
2672 int marg, left;
2673 int width;
2674
2675 if (!PyArg_ParseTuple(args, "i:center", &width))
2676 return NULL;
2677
Tim Peters8fa5dd02001-09-12 02:18:30 +00002678 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002679 Py_INCREF(self);
2680 return (PyObject*) self;
2681 }
2682
2683 marg = width - PyString_GET_SIZE(self);
2684 left = marg / 2 + (marg & width & 1);
2685
2686 return pad(self, left, marg - left, ' ');
2687}
2688
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002689PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002690"S.zfill(width) -> string\n"
2691"\n"
2692"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002693"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002694
2695static PyObject *
2696string_zfill(PyStringObject *self, PyObject *args)
2697{
2698 int fill;
2699 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002700 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002701
2702 int width;
2703 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2704 return NULL;
2705
2706 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002707 if (PyString_CheckExact(self)) {
2708 Py_INCREF(self);
2709 return (PyObject*) self;
2710 }
2711 else
2712 return PyString_FromStringAndSize(
2713 PyString_AS_STRING(self),
2714 PyString_GET_SIZE(self)
2715 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002716 }
2717
2718 fill = width - PyString_GET_SIZE(self);
2719
2720 s = pad(self, fill, 0, '0');
2721
2722 if (s == NULL)
2723 return NULL;
2724
2725 p = PyString_AS_STRING(s);
2726 if (p[fill] == '+' || p[fill] == '-') {
2727 /* move sign to beginning of string */
2728 p[0] = p[fill];
2729 p[fill] = '0';
2730 }
2731
2732 return (PyObject*) s;
2733}
2734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002735PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002736"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002737"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002738"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002739"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002740
2741static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002742string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002743{
Fred Drakeba096332000-07-09 07:04:36 +00002744 register const unsigned char *p
2745 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002746 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002747
Guido van Rossum4c08d552000-03-10 22:55:18 +00002748 /* Shortcut for single character strings */
2749 if (PyString_GET_SIZE(self) == 1 &&
2750 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002751 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002752
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002753 /* Special case for empty strings */
2754 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002755 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002756
Guido van Rossum4c08d552000-03-10 22:55:18 +00002757 e = p + PyString_GET_SIZE(self);
2758 for (; p < e; p++) {
2759 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002760 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002761 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002762 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002763}
2764
2765
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002766PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002767"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002768\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002769Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002770and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002771
2772static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002773string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002774{
Fred Drakeba096332000-07-09 07:04:36 +00002775 register const unsigned char *p
2776 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002777 register const unsigned char *e;
2778
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002779 /* Shortcut for single character strings */
2780 if (PyString_GET_SIZE(self) == 1 &&
2781 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002782 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002783
2784 /* Special case for empty strings */
2785 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002786 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002787
2788 e = p + PyString_GET_SIZE(self);
2789 for (; p < e; p++) {
2790 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002791 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002792 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002793 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002794}
2795
2796
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002797PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002798"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002799\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002800Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002801and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002802
2803static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002804string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002805{
Fred Drakeba096332000-07-09 07:04:36 +00002806 register const unsigned char *p
2807 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002808 register const unsigned char *e;
2809
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002810 /* Shortcut for single character strings */
2811 if (PyString_GET_SIZE(self) == 1 &&
2812 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002813 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002814
2815 /* Special case for empty strings */
2816 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002817 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002818
2819 e = p + PyString_GET_SIZE(self);
2820 for (; p < e; p++) {
2821 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002822 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002823 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002824 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002825}
2826
2827
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002828PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002829"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002830\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002831Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002832False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002833
2834static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002835string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002836{
Fred Drakeba096332000-07-09 07:04:36 +00002837 register const unsigned char *p
2838 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002839 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002840
Guido van Rossum4c08d552000-03-10 22:55:18 +00002841 /* Shortcut for single character strings */
2842 if (PyString_GET_SIZE(self) == 1 &&
2843 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002844 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002846 /* Special case for empty strings */
2847 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002848 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002849
Guido van Rossum4c08d552000-03-10 22:55:18 +00002850 e = p + PyString_GET_SIZE(self);
2851 for (; p < e; p++) {
2852 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002853 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002854 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002855 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002856}
2857
2858
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002859PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002860"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002861\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002862Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002863at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002864
2865static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002866string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867{
Fred Drakeba096332000-07-09 07:04:36 +00002868 register const unsigned char *p
2869 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002870 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871 int cased;
2872
Guido van Rossum4c08d552000-03-10 22:55:18 +00002873 /* Shortcut for single character strings */
2874 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002875 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002876
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002877 /* Special case for empty strings */
2878 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002879 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002880
Guido van Rossum4c08d552000-03-10 22:55:18 +00002881 e = p + PyString_GET_SIZE(self);
2882 cased = 0;
2883 for (; p < e; p++) {
2884 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002885 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002886 else if (!cased && islower(*p))
2887 cased = 1;
2888 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002889 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002890}
2891
2892
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002893PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002894"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002895\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002896Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002897at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002898
2899static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002900string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002901{
Fred Drakeba096332000-07-09 07:04:36 +00002902 register const unsigned char *p
2903 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002904 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002905 int cased;
2906
Guido van Rossum4c08d552000-03-10 22:55:18 +00002907 /* Shortcut for single character strings */
2908 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002909 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002910
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002911 /* Special case for empty strings */
2912 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002913 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002914
Guido van Rossum4c08d552000-03-10 22:55:18 +00002915 e = p + PyString_GET_SIZE(self);
2916 cased = 0;
2917 for (; p < e; p++) {
2918 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002919 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002920 else if (!cased && isupper(*p))
2921 cased = 1;
2922 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002923 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002924}
2925
2926
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002927PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002928"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002929\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002930Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002931may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002932ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002933
2934static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002935string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002936{
Fred Drakeba096332000-07-09 07:04:36 +00002937 register const unsigned char *p
2938 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002939 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002940 int cased, previous_is_cased;
2941
Guido van Rossum4c08d552000-03-10 22:55:18 +00002942 /* Shortcut for single character strings */
2943 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002944 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002945
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002946 /* Special case for empty strings */
2947 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002948 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002949
Guido van Rossum4c08d552000-03-10 22:55:18 +00002950 e = p + PyString_GET_SIZE(self);
2951 cased = 0;
2952 previous_is_cased = 0;
2953 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002954 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002955
2956 if (isupper(ch)) {
2957 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002958 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002959 previous_is_cased = 1;
2960 cased = 1;
2961 }
2962 else if (islower(ch)) {
2963 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002964 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002965 previous_is_cased = 1;
2966 cased = 1;
2967 }
2968 else
2969 previous_is_cased = 0;
2970 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002971 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002972}
2973
2974
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002975PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002976"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002977\n\
2978Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002979Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002980is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002981
2982#define SPLIT_APPEND(data, left, right) \
2983 str = PyString_FromStringAndSize(data + left, right - left); \
2984 if (!str) \
2985 goto onError; \
2986 if (PyList_Append(list, str)) { \
2987 Py_DECREF(str); \
2988 goto onError; \
2989 } \
2990 else \
2991 Py_DECREF(str);
2992
2993static PyObject*
2994string_splitlines(PyStringObject *self, PyObject *args)
2995{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002996 register int i;
2997 register int j;
2998 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002999 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003000 PyObject *list;
3001 PyObject *str;
3002 char *data;
3003
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003004 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003005 return NULL;
3006
3007 data = PyString_AS_STRING(self);
3008 len = PyString_GET_SIZE(self);
3009
Guido van Rossum4c08d552000-03-10 22:55:18 +00003010 list = PyList_New(0);
3011 if (!list)
3012 goto onError;
3013
3014 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003015 int eol;
3016
Guido van Rossum4c08d552000-03-10 22:55:18 +00003017 /* Find a line and append it */
3018 while (i < len && data[i] != '\n' && data[i] != '\r')
3019 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003020
3021 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003022 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003023 if (i < len) {
3024 if (data[i] == '\r' && i + 1 < len &&
3025 data[i+1] == '\n')
3026 i += 2;
3027 else
3028 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003029 if (keepends)
3030 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003031 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003032 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003033 j = i;
3034 }
3035 if (j < len) {
3036 SPLIT_APPEND(data, j, len);
3037 }
3038
3039 return list;
3040
3041 onError:
3042 Py_DECREF(list);
3043 return NULL;
3044}
3045
3046#undef SPLIT_APPEND
3047
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003048static PyObject *
3049string_getnewargs(PyStringObject *v)
3050{
3051 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3052}
3053
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003055static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003056string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003057 /* Counterparts of the obsolete stropmodule functions; except
3058 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003059 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3060 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3061 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3062 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003063 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3064 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3065 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3066 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3067 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3068 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3069 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003070 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3071 capitalize__doc__},
3072 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3073 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3074 endswith__doc__},
3075 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3076 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3077 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3078 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3079 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3080 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3081 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3082 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3083 startswith__doc__},
3084 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3085 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3086 swapcase__doc__},
3087 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3088 translate__doc__},
3089 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3090 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3091 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3092 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3093 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3094 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3095 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3096 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3097 expandtabs__doc__},
3098 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3099 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003100 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003101 {NULL, NULL} /* sentinel */
3102};
3103
Jeremy Hylton938ace62002-07-17 16:30:39 +00003104static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003105str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3106
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003107static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003108string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003109{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003110 PyObject *x = NULL;
3111 static char *kwlist[] = {"object", 0};
3112
Guido van Rossumae960af2001-08-30 03:11:59 +00003113 if (type != &PyString_Type)
3114 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003115 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3116 return NULL;
3117 if (x == NULL)
3118 return PyString_FromString("");
3119 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003120}
3121
Guido van Rossumae960af2001-08-30 03:11:59 +00003122static PyObject *
3123str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3124{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003125 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003126 int n;
3127
3128 assert(PyType_IsSubtype(type, &PyString_Type));
3129 tmp = string_new(&PyString_Type, args, kwds);
3130 if (tmp == NULL)
3131 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003132 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003133 n = PyString_GET_SIZE(tmp);
3134 pnew = type->tp_alloc(type, n);
3135 if (pnew != NULL) {
3136 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003137 ((PyStringObject *)pnew)->ob_shash =
3138 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003139 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003140 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003141 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003142 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003143}
3144
Guido van Rossumcacfc072002-05-24 19:01:59 +00003145static PyObject *
3146basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3147{
3148 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003149 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003150 return NULL;
3151}
3152
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003153static PyObject *
3154string_mod(PyObject *v, PyObject *w)
3155{
3156 if (!PyString_Check(v)) {
3157 Py_INCREF(Py_NotImplemented);
3158 return Py_NotImplemented;
3159 }
3160 return PyString_Format(v, w);
3161}
3162
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003163PyDoc_STRVAR(basestring_doc,
3164"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003165
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003166static PyNumberMethods string_as_number = {
3167 0, /*nb_add*/
3168 0, /*nb_subtract*/
3169 0, /*nb_multiply*/
3170 0, /*nb_divide*/
3171 string_mod, /*nb_remainder*/
3172};
3173
3174
Guido van Rossumcacfc072002-05-24 19:01:59 +00003175PyTypeObject PyBaseString_Type = {
3176 PyObject_HEAD_INIT(&PyType_Type)
3177 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003178 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003179 0,
3180 0,
3181 0, /* tp_dealloc */
3182 0, /* tp_print */
3183 0, /* tp_getattr */
3184 0, /* tp_setattr */
3185 0, /* tp_compare */
3186 0, /* tp_repr */
3187 0, /* tp_as_number */
3188 0, /* tp_as_sequence */
3189 0, /* tp_as_mapping */
3190 0, /* tp_hash */
3191 0, /* tp_call */
3192 0, /* tp_str */
3193 0, /* tp_getattro */
3194 0, /* tp_setattro */
3195 0, /* tp_as_buffer */
3196 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3197 basestring_doc, /* tp_doc */
3198 0, /* tp_traverse */
3199 0, /* tp_clear */
3200 0, /* tp_richcompare */
3201 0, /* tp_weaklistoffset */
3202 0, /* tp_iter */
3203 0, /* tp_iternext */
3204 0, /* tp_methods */
3205 0, /* tp_members */
3206 0, /* tp_getset */
3207 &PyBaseObject_Type, /* tp_base */
3208 0, /* tp_dict */
3209 0, /* tp_descr_get */
3210 0, /* tp_descr_set */
3211 0, /* tp_dictoffset */
3212 0, /* tp_init */
3213 0, /* tp_alloc */
3214 basestring_new, /* tp_new */
3215 0, /* tp_free */
3216};
3217
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003218PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003219"str(object) -> string\n\
3220\n\
3221Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003222If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003223
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003224PyTypeObject PyString_Type = {
3225 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003226 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003227 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003228 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003229 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003230 (destructor)string_dealloc, /* tp_dealloc */
3231 (printfunc)string_print, /* tp_print */
3232 0, /* tp_getattr */
3233 0, /* tp_setattr */
3234 0, /* tp_compare */
3235 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003236 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003237 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003238 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003239 (hashfunc)string_hash, /* tp_hash */
3240 0, /* tp_call */
3241 (reprfunc)string_str, /* tp_str */
3242 PyObject_GenericGetAttr, /* tp_getattro */
3243 0, /* tp_setattro */
3244 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003245 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3246 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003247 string_doc, /* tp_doc */
3248 0, /* tp_traverse */
3249 0, /* tp_clear */
3250 (richcmpfunc)string_richcompare, /* tp_richcompare */
3251 0, /* tp_weaklistoffset */
3252 0, /* tp_iter */
3253 0, /* tp_iternext */
3254 string_methods, /* tp_methods */
3255 0, /* tp_members */
3256 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003257 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003258 0, /* tp_dict */
3259 0, /* tp_descr_get */
3260 0, /* tp_descr_set */
3261 0, /* tp_dictoffset */
3262 0, /* tp_init */
3263 0, /* tp_alloc */
3264 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003265 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003266};
3267
3268void
Fred Drakeba096332000-07-09 07:04:36 +00003269PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003270{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003271 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003272 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003273 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003274 if (w == NULL || !PyString_Check(*pv)) {
3275 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003276 *pv = NULL;
3277 return;
3278 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003279 v = string_concat((PyStringObject *) *pv, w);
3280 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003281 *pv = v;
3282}
3283
Guido van Rossum013142a1994-08-30 08:19:36 +00003284void
Fred Drakeba096332000-07-09 07:04:36 +00003285PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003286{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003287 PyString_Concat(pv, w);
3288 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003289}
3290
3291
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003292/* The following function breaks the notion that strings are immutable:
3293 it changes the size of a string. We get away with this only if there
3294 is only one module referencing the object. You can also think of it
3295 as creating a new string object and destroying the old one, only
3296 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003297 already be known to some other part of the code...
3298 Note that if there's not enough memory to resize the string, the original
3299 string object at *pv is deallocated, *pv is set to NULL, an "out of
3300 memory" exception is set, and -1 is returned. Else (on success) 0 is
3301 returned, and the value in *pv may or may not be the same as on input.
3302 As always, an extra byte is allocated for a trailing \0 byte (newsize
3303 does *not* include that), and a trailing \0 byte is stored.
3304*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003305
3306int
Fred Drakeba096332000-07-09 07:04:36 +00003307_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003308{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003309 register PyObject *v;
3310 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003311 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003312 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003313 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003314 Py_DECREF(v);
3315 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003316 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003317 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003318 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003319 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003320 _Py_ForgetReference(v);
3321 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003322 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003323 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003324 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003325 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003326 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003327 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003328 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003329 _Py_NewReference(*pv);
3330 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003331 sv->ob_size = newsize;
3332 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003333 return 0;
3334}
Guido van Rossume5372401993-03-16 12:15:04 +00003335
3336/* Helpers for formatstring */
3337
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003338static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003339getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003340{
3341 int argidx = *p_argidx;
3342 if (argidx < arglen) {
3343 (*p_argidx)++;
3344 if (arglen < 0)
3345 return args;
3346 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003347 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003348 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003349 PyErr_SetString(PyExc_TypeError,
3350 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003351 return NULL;
3352}
3353
Tim Peters38fd5b62000-09-21 05:43:11 +00003354/* Format codes
3355 * F_LJUST '-'
3356 * F_SIGN '+'
3357 * F_BLANK ' '
3358 * F_ALT '#'
3359 * F_ZERO '0'
3360 */
Guido van Rossume5372401993-03-16 12:15:04 +00003361#define F_LJUST (1<<0)
3362#define F_SIGN (1<<1)
3363#define F_BLANK (1<<2)
3364#define F_ALT (1<<3)
3365#define F_ZERO (1<<4)
3366
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003367static int
Fred Drakeba096332000-07-09 07:04:36 +00003368formatfloat(char *buf, size_t buflen, int flags,
3369 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003370{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003371 /* fmt = '%#.' + `prec` + `type`
3372 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003373 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003374 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003375 x = PyFloat_AsDouble(v);
3376 if (x == -1.0 && PyErr_Occurred()) {
3377 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003378 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003379 }
Guido van Rossume5372401993-03-16 12:15:04 +00003380 if (prec < 0)
3381 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003382 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3383 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003384 /* Worst case length calc to ensure no buffer overrun:
3385
3386 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003387 fmt = %#.<prec>g
3388 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003389 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003390 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003391
3392 'f' formats:
3393 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3394 len = 1 + 50 + 1 + prec = 52 + prec
3395
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003396 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003397 always given), therefore increase the length by one.
3398
3399 */
3400 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3401 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003402 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003403 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003404 return -1;
3405 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003406 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3407 (flags&F_ALT) ? "#" : "",
3408 prec, type);
Tim Peters885d4572001-11-28 20:27:42 +00003409 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003410 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003411}
3412
Tim Peters38fd5b62000-09-21 05:43:11 +00003413/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3414 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3415 * Python's regular ints.
3416 * Return value: a new PyString*, or NULL if error.
3417 * . *pbuf is set to point into it,
3418 * *plen set to the # of chars following that.
3419 * Caller must decref it when done using pbuf.
3420 * The string starting at *pbuf is of the form
3421 * "-"? ("0x" | "0X")? digit+
3422 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003423 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003424 * There will be at least prec digits, zero-filled on the left if
3425 * necessary to get that many.
3426 * val object to be converted
3427 * flags bitmask of format flags; only F_ALT is looked at
3428 * prec minimum number of digits; 0-fill on left if needed
3429 * type a character in [duoxX]; u acts the same as d
3430 *
3431 * CAUTION: o, x and X conversions on regular ints can never
3432 * produce a '-' sign, but can for Python's unbounded ints.
3433 */
3434PyObject*
3435_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3436 char **pbuf, int *plen)
3437{
3438 PyObject *result = NULL;
3439 char *buf;
3440 int i;
3441 int sign; /* 1 if '-', else 0 */
3442 int len; /* number of characters */
3443 int numdigits; /* len == numnondigits + numdigits */
3444 int numnondigits = 0;
3445
3446 switch (type) {
3447 case 'd':
3448 case 'u':
3449 result = val->ob_type->tp_str(val);
3450 break;
3451 case 'o':
3452 result = val->ob_type->tp_as_number->nb_oct(val);
3453 break;
3454 case 'x':
3455 case 'X':
3456 numnondigits = 2;
3457 result = val->ob_type->tp_as_number->nb_hex(val);
3458 break;
3459 default:
3460 assert(!"'type' not in [duoxX]");
3461 }
3462 if (!result)
3463 return NULL;
3464
3465 /* To modify the string in-place, there can only be one reference. */
3466 if (result->ob_refcnt != 1) {
3467 PyErr_BadInternalCall();
3468 return NULL;
3469 }
3470 buf = PyString_AsString(result);
3471 len = PyString_Size(result);
3472 if (buf[len-1] == 'L') {
3473 --len;
3474 buf[len] = '\0';
3475 }
3476 sign = buf[0] == '-';
3477 numnondigits += sign;
3478 numdigits = len - numnondigits;
3479 assert(numdigits > 0);
3480
Tim Petersfff53252001-04-12 18:38:48 +00003481 /* Get rid of base marker unless F_ALT */
3482 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003483 /* Need to skip 0x, 0X or 0. */
3484 int skipped = 0;
3485 switch (type) {
3486 case 'o':
3487 assert(buf[sign] == '0');
3488 /* If 0 is only digit, leave it alone. */
3489 if (numdigits > 1) {
3490 skipped = 1;
3491 --numdigits;
3492 }
3493 break;
3494 case 'x':
3495 case 'X':
3496 assert(buf[sign] == '0');
3497 assert(buf[sign + 1] == 'x');
3498 skipped = 2;
3499 numnondigits -= 2;
3500 break;
3501 }
3502 if (skipped) {
3503 buf += skipped;
3504 len -= skipped;
3505 if (sign)
3506 buf[0] = '-';
3507 }
3508 assert(len == numnondigits + numdigits);
3509 assert(numdigits > 0);
3510 }
3511
3512 /* Fill with leading zeroes to meet minimum width. */
3513 if (prec > numdigits) {
3514 PyObject *r1 = PyString_FromStringAndSize(NULL,
3515 numnondigits + prec);
3516 char *b1;
3517 if (!r1) {
3518 Py_DECREF(result);
3519 return NULL;
3520 }
3521 b1 = PyString_AS_STRING(r1);
3522 for (i = 0; i < numnondigits; ++i)
3523 *b1++ = *buf++;
3524 for (i = 0; i < prec - numdigits; i++)
3525 *b1++ = '0';
3526 for (i = 0; i < numdigits; i++)
3527 *b1++ = *buf++;
3528 *b1 = '\0';
3529 Py_DECREF(result);
3530 result = r1;
3531 buf = PyString_AS_STRING(result);
3532 len = numnondigits + prec;
3533 }
3534
3535 /* Fix up case for hex conversions. */
3536 switch (type) {
3537 case 'x':
3538 /* Need to convert all upper case letters to lower case. */
3539 for (i = 0; i < len; i++)
3540 if (buf[i] >= 'A' && buf[i] <= 'F')
3541 buf[i] += 'a'-'A';
3542 break;
3543 case 'X':
3544 /* Need to convert 0x to 0X (and -0x to -0X). */
3545 if (buf[sign + 1] == 'x')
3546 buf[sign + 1] = 'X';
3547 break;
3548 }
3549 *pbuf = buf;
3550 *plen = len;
3551 return result;
3552}
3553
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003554static int
Fred Drakeba096332000-07-09 07:04:36 +00003555formatint(char *buf, size_t buflen, int flags,
3556 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003557{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003558 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003559 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3560 + 1 + 1 = 24 */
3561 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003562 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003563
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003564 x = PyInt_AsLong(v);
3565 if (x == -1 && PyErr_Occurred()) {
3566 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003567 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003568 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003569 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003570 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003571 "%u/%o/%x/%X of negative int will return "
3572 "a signed string in Python 2.4 and up") < 0)
3573 return -1;
3574 }
Guido van Rossume5372401993-03-16 12:15:04 +00003575 if (prec < 0)
3576 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003577
3578 if ((flags & F_ALT) &&
3579 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003580 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003581 * of issues that cause pain:
3582 * - when 0 is being converted, the C standard leaves off
3583 * the '0x' or '0X', which is inconsistent with other
3584 * %#x/%#X conversions and inconsistent with Python's
3585 * hex() function
3586 * - there are platforms that violate the standard and
3587 * convert 0 with the '0x' or '0X'
3588 * (Metrowerks, Compaq Tru64)
3589 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003590 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003591 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003592 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003593 * We can achieve the desired consistency by inserting our
3594 * own '0x' or '0X' prefix, and substituting %x/%X in place
3595 * of %#x/%#X.
3596 *
3597 * Note that this is the same approach as used in
3598 * formatint() in unicodeobject.c
3599 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003600 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003601 type, prec, type);
3602 }
3603 else {
3604 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003605 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003606 prec, type);
3607 }
3608
Tim Peters38fd5b62000-09-21 05:43:11 +00003609 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003610 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3611 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003612 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003613 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003614 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003615 return -1;
3616 }
Tim Peters885d4572001-11-28 20:27:42 +00003617 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003618 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003619}
3620
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003621static int
Fred Drakeba096332000-07-09 07:04:36 +00003622formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003623{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003624 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003625 if (PyString_Check(v)) {
3626 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003627 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003628 }
3629 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003630 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003631 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003632 }
3633 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003634 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003635}
3636
Guido van Rossum013142a1994-08-30 08:19:36 +00003637
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003638/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3639
3640 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3641 chars are formatted. XXX This is a magic number. Each formatting
3642 routine does bounds checking to ensure no overflow, but a better
3643 solution may be to malloc a buffer of appropriate size for each
3644 format. For now, the current solution is sufficient.
3645*/
3646#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003647
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003648PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003649PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003650{
3651 char *fmt, *res;
3652 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003653 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003654 PyObject *result, *orig_args;
3655#ifdef Py_USING_UNICODE
3656 PyObject *v, *w;
3657#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003658 PyObject *dict = NULL;
3659 if (format == NULL || !PyString_Check(format) || args == NULL) {
3660 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003661 return NULL;
3662 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003663 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003664 fmt = PyString_AS_STRING(format);
3665 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003666 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003667 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003668 if (result == NULL)
3669 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003670 res = PyString_AsString(result);
3671 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003672 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003673 argidx = 0;
3674 }
3675 else {
3676 arglen = -1;
3677 argidx = -2;
3678 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003679 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3680 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003681 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003682 while (--fmtcnt >= 0) {
3683 if (*fmt != '%') {
3684 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003685 rescnt = fmtcnt + 100;
3686 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003687 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003688 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003689 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003690 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003691 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003692 }
3693 *res++ = *fmt++;
3694 }
3695 else {
3696 /* Got a format specifier */
3697 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003698 int width = -1;
3699 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003700 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003701 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003702 PyObject *v = NULL;
3703 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003704 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003705 int sign;
3706 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003707 char formatbuf[FORMATBUFLEN];
3708 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003709#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003710 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003711 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003712#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003713
Guido van Rossumda9c2711996-12-05 21:58:58 +00003714 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003715 if (*fmt == '(') {
3716 char *keystart;
3717 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003718 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003719 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003720
3721 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003722 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003723 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003724 goto error;
3725 }
3726 ++fmt;
3727 --fmtcnt;
3728 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003729 /* Skip over balanced parentheses */
3730 while (pcount > 0 && --fmtcnt >= 0) {
3731 if (*fmt == ')')
3732 --pcount;
3733 else if (*fmt == '(')
3734 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003735 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003736 }
3737 keylen = fmt - keystart - 1;
3738 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003739 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003740 "incomplete format key");
3741 goto error;
3742 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003743 key = PyString_FromStringAndSize(keystart,
3744 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003745 if (key == NULL)
3746 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003747 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003748 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003749 args_owned = 0;
3750 }
3751 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003752 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003753 if (args == NULL) {
3754 goto error;
3755 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003756 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003757 arglen = -1;
3758 argidx = -2;
3759 }
Guido van Rossume5372401993-03-16 12:15:04 +00003760 while (--fmtcnt >= 0) {
3761 switch (c = *fmt++) {
3762 case '-': flags |= F_LJUST; continue;
3763 case '+': flags |= F_SIGN; continue;
3764 case ' ': flags |= F_BLANK; continue;
3765 case '#': flags |= F_ALT; continue;
3766 case '0': flags |= F_ZERO; continue;
3767 }
3768 break;
3769 }
3770 if (c == '*') {
3771 v = getnextarg(args, arglen, &argidx);
3772 if (v == NULL)
3773 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003774 if (!PyInt_Check(v)) {
3775 PyErr_SetString(PyExc_TypeError,
3776 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003777 goto error;
3778 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003779 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003780 if (width < 0) {
3781 flags |= F_LJUST;
3782 width = -width;
3783 }
Guido van Rossume5372401993-03-16 12:15:04 +00003784 if (--fmtcnt >= 0)
3785 c = *fmt++;
3786 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003787 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003788 width = c - '0';
3789 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003790 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003791 if (!isdigit(c))
3792 break;
3793 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003794 PyErr_SetString(
3795 PyExc_ValueError,
3796 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003797 goto error;
3798 }
3799 width = width*10 + (c - '0');
3800 }
3801 }
3802 if (c == '.') {
3803 prec = 0;
3804 if (--fmtcnt >= 0)
3805 c = *fmt++;
3806 if (c == '*') {
3807 v = getnextarg(args, arglen, &argidx);
3808 if (v == NULL)
3809 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003810 if (!PyInt_Check(v)) {
3811 PyErr_SetString(
3812 PyExc_TypeError,
3813 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003814 goto error;
3815 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003816 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003817 if (prec < 0)
3818 prec = 0;
3819 if (--fmtcnt >= 0)
3820 c = *fmt++;
3821 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003822 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003823 prec = c - '0';
3824 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003825 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003826 if (!isdigit(c))
3827 break;
3828 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003829 PyErr_SetString(
3830 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003831 "prec too big");
3832 goto error;
3833 }
3834 prec = prec*10 + (c - '0');
3835 }
3836 }
3837 } /* prec */
3838 if (fmtcnt >= 0) {
3839 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003840 if (--fmtcnt >= 0)
3841 c = *fmt++;
3842 }
3843 }
3844 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003845 PyErr_SetString(PyExc_ValueError,
3846 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003847 goto error;
3848 }
3849 if (c != '%') {
3850 v = getnextarg(args, arglen, &argidx);
3851 if (v == NULL)
3852 goto error;
3853 }
3854 sign = 0;
3855 fill = ' ';
3856 switch (c) {
3857 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003858 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003859 len = 1;
3860 break;
3861 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003862#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003863 if (PyUnicode_Check(v)) {
3864 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003865 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003866 goto unicode;
3867 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003868#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00003869 /* Fall through */
3870 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003871 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003872 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003873 else
3874 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003875 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003876 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003877 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00003878 /* XXX Note: this should never happen,
3879 since PyObject_Repr() and
3880 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003881 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00003882 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003883 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003884 goto error;
3885 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003886 pbuf = PyString_AS_STRING(temp);
3887 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003888 if (prec >= 0 && len > prec)
3889 len = prec;
3890 break;
3891 case 'i':
3892 case 'd':
3893 case 'u':
3894 case 'o':
3895 case 'x':
3896 case 'X':
3897 if (c == 'i')
3898 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003899 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003900 temp = _PyString_FormatLong(v, flags,
3901 prec, c, &pbuf, &len);
3902 if (!temp)
3903 goto error;
3904 /* unbounded ints can always produce
3905 a sign character! */
3906 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003907 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003908 else {
3909 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003910 len = formatint(pbuf,
3911 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003912 flags, prec, c, v);
3913 if (len < 0)
3914 goto error;
3915 /* only d conversion is signed */
3916 sign = c == 'd';
3917 }
3918 if (flags & F_ZERO)
3919 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003920 break;
3921 case 'e':
3922 case 'E':
3923 case 'f':
3924 case 'g':
3925 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003926 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003927 len = formatfloat(pbuf, sizeof(formatbuf),
3928 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003929 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003930 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003931 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003932 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003933 fill = '0';
3934 break;
3935 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003936 pbuf = formatbuf;
3937 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003938 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003939 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003940 break;
3941 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003942 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003943 "unsupported format character '%c' (0x%x) "
3944 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00003945 c, c,
3946 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003947 goto error;
3948 }
3949 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003950 if (*pbuf == '-' || *pbuf == '+') {
3951 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003952 len--;
3953 }
3954 else if (flags & F_SIGN)
3955 sign = '+';
3956 else if (flags & F_BLANK)
3957 sign = ' ';
3958 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003959 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003960 }
3961 if (width < len)
3962 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003963 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003964 reslen -= rescnt;
3965 rescnt = width + fmtcnt + 100;
3966 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003967 if (reslen < 0) {
3968 Py_DECREF(result);
3969 return PyErr_NoMemory();
3970 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003971 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003972 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003973 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003974 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003975 }
3976 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003977 if (fill != ' ')
3978 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003979 rescnt--;
3980 if (width > len)
3981 width--;
3982 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003983 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3984 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003985 assert(pbuf[1] == c);
3986 if (fill != ' ') {
3987 *res++ = *pbuf++;
3988 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003989 }
Tim Petersfff53252001-04-12 18:38:48 +00003990 rescnt -= 2;
3991 width -= 2;
3992 if (width < 0)
3993 width = 0;
3994 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003995 }
3996 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003997 do {
3998 --rescnt;
3999 *res++ = fill;
4000 } while (--width > len);
4001 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004002 if (fill == ' ') {
4003 if (sign)
4004 *res++ = sign;
4005 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004006 (c == 'x' || c == 'X')) {
4007 assert(pbuf[0] == '0');
4008 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004009 *res++ = *pbuf++;
4010 *res++ = *pbuf++;
4011 }
4012 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004013 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004014 res += len;
4015 rescnt -= len;
4016 while (--width >= len) {
4017 --rescnt;
4018 *res++ = ' ';
4019 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004020 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004021 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004022 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004023 goto error;
4024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004025 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004026 } /* '%' */
4027 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004028 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004029 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004030 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004031 goto error;
4032 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004033 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004034 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004035 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004036 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004037 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004038
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004039#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004040 unicode:
4041 if (args_owned) {
4042 Py_DECREF(args);
4043 args_owned = 0;
4044 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004045 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004046 if (PyTuple_Check(orig_args) && argidx > 0) {
4047 PyObject *v;
4048 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4049 v = PyTuple_New(n);
4050 if (v == NULL)
4051 goto error;
4052 while (--n >= 0) {
4053 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4054 Py_INCREF(w);
4055 PyTuple_SET_ITEM(v, n, w);
4056 }
4057 args = v;
4058 } else {
4059 Py_INCREF(orig_args);
4060 args = orig_args;
4061 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004062 args_owned = 1;
4063 /* Take what we have of the result and let the Unicode formatting
4064 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004065 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004066 if (_PyString_Resize(&result, rescnt))
4067 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004068 fmtcnt = PyString_GET_SIZE(format) - \
4069 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004070 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4071 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004072 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004073 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004074 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004075 if (v == NULL)
4076 goto error;
4077 /* Paste what we have (result) to what the Unicode formatting
4078 function returned (v) and return the result (or error) */
4079 w = PyUnicode_Concat(result, v);
4080 Py_DECREF(result);
4081 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004082 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004083 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004084#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004085
Guido van Rossume5372401993-03-16 12:15:04 +00004086 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004087 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004088 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004089 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004090 }
Guido van Rossume5372401993-03-16 12:15:04 +00004091 return NULL;
4092}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004093
Guido van Rossum2a61e741997-01-18 07:55:05 +00004094void
Fred Drakeba096332000-07-09 07:04:36 +00004095PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004096{
4097 register PyStringObject *s = (PyStringObject *)(*p);
4098 PyObject *t;
4099 if (s == NULL || !PyString_Check(s))
4100 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004101 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004102 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004103 if (interned == NULL) {
4104 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004105 if (interned == NULL) {
4106 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004107 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004108 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004109 }
4110 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4111 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004112 Py_DECREF(*p);
4113 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004114 return;
4115 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004116 /* Ensure that only true string objects appear in the intern dict */
4117 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004118 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4119 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004120 if (t == NULL) {
4121 PyErr_Clear();
4122 return;
Tim Peters111f6092001-09-12 07:54:51 +00004123 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004124 } else {
4125 t = (PyObject*) s;
4126 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004127 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004128
4129 if (PyDict_SetItem(interned, t, t) == 0) {
4130 /* The two references in interned are not counted by
4131 refcnt. The string deallocator will take care of this */
4132 ((PyObject *)t)->ob_refcnt-=2;
4133 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4134 Py_DECREF(*p);
4135 *p = t;
4136 return;
4137 }
4138 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004139 PyErr_Clear();
4140}
4141
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004142void
4143PyString_InternImmortal(PyObject **p)
4144{
4145 PyString_InternInPlace(p);
4146 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4147 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4148 Py_INCREF(*p);
4149 }
4150}
4151
Guido van Rossum2a61e741997-01-18 07:55:05 +00004152
4153PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004154PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004155{
4156 PyObject *s = PyString_FromString(cp);
4157 if (s == NULL)
4158 return NULL;
4159 PyString_InternInPlace(&s);
4160 return s;
4161}
4162
Guido van Rossum8cf04761997-08-02 02:57:45 +00004163void
Fred Drakeba096332000-07-09 07:04:36 +00004164PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004165{
4166 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004167 for (i = 0; i < UCHAR_MAX + 1; i++) {
4168 Py_XDECREF(characters[i]);
4169 characters[i] = NULL;
4170 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004171 Py_XDECREF(nullstring);
4172 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004173}
Barry Warsawa903ad982001-02-23 16:40:48 +00004174
Barry Warsawa903ad982001-02-23 16:40:48 +00004175void _Py_ReleaseInternedStrings(void)
4176{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004177 PyObject *keys;
4178 PyStringObject *s;
4179 int i, n;
4180
4181 if (interned == NULL || !PyDict_Check(interned))
4182 return;
4183 keys = PyDict_Keys(interned);
4184 if (keys == NULL || !PyList_Check(keys)) {
4185 PyErr_Clear();
4186 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004187 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004188
4189 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4190 detector, interned strings are not forcibly deallocated; rather, we
4191 give them their stolen references back, and then clear and DECREF
4192 the interned dict. */
4193
4194 fprintf(stderr, "releasing interned strings\n");
4195 n = PyList_GET_SIZE(keys);
4196 for (i = 0; i < n; i++) {
4197 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4198 switch (s->ob_sstate) {
4199 case SSTATE_NOT_INTERNED:
4200 /* XXX Shouldn't happen */
4201 break;
4202 case SSTATE_INTERNED_IMMORTAL:
4203 s->ob_refcnt += 1;
4204 break;
4205 case SSTATE_INTERNED_MORTAL:
4206 s->ob_refcnt += 2;
4207 break;
4208 default:
4209 Py_FatalError("Inconsistent interned string state.");
4210 }
4211 s->ob_sstate = SSTATE_NOT_INTERNED;
4212 }
4213 Py_DECREF(keys);
4214 PyDict_Clear(interned);
4215 Py_DECREF(interned);
4216 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004217}