blob: 951205961755ecf01d92f53446a21ee9f36ac730 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000536 int newlen = recode_encoding ? 4*len:len;
537 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000544 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545#ifdef Py_USING_UNICODE
546 if (recode_encoding && (*s & 0x80)) {
547 PyObject *u, *w;
548 char *r;
549 const char* t;
550 int rn;
551 t = s;
552 /* Decode non-ASCII bytes as UTF-8. */
553 while (t < end && (*t & 0x80)) t++;
554 u = PyUnicode_DecodeUTF8(s, t - s, errors);
555 if(!u) goto failed;
556
557 /* Recode them in target encoding. */
558 w = PyUnicode_AsEncodedString(
559 u, recode_encoding, errors);
560 Py_DECREF(u);
561 if (!w) goto failed;
562
563 /* Append bytes to output buffer. */
564 r = PyString_AsString(w);
565 rn = PyString_Size(w);
566 memcpy(p, r, rn);
567 p += rn;
568 Py_DECREF(w);
569 s = t;
570 } else {
571 *p++ = *s++;
572 }
573#else
574 *p++ = *s++;
575#endif
576 continue;
577 }
578 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000579 if (s==end) {
580 PyErr_SetString(PyExc_ValueError,
581 "Trailing \\ in string");
582 goto failed;
583 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000584 switch (*s++) {
585 /* XXX This assumes ASCII! */
586 case '\n': break;
587 case '\\': *p++ = '\\'; break;
588 case '\'': *p++ = '\''; break;
589 case '\"': *p++ = '\"'; break;
590 case 'b': *p++ = '\b'; break;
591 case 'f': *p++ = '\014'; break; /* FF */
592 case 't': *p++ = '\t'; break;
593 case 'n': *p++ = '\n'; break;
594 case 'r': *p++ = '\r'; break;
595 case 'v': *p++ = '\013'; break; /* VT */
596 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
597 case '0': case '1': case '2': case '3':
598 case '4': case '5': case '6': case '7':
599 c = s[-1] - '0';
600 if ('0' <= *s && *s <= '7') {
601 c = (c<<3) + *s++ - '0';
602 if ('0' <= *s && *s <= '7')
603 c = (c<<3) + *s++ - '0';
604 }
605 *p++ = c;
606 break;
607 case 'x':
608 if (isxdigit(Py_CHARMASK(s[0]))
609 && isxdigit(Py_CHARMASK(s[1]))) {
610 unsigned int x = 0;
611 c = Py_CHARMASK(*s);
612 s++;
613 if (isdigit(c))
614 x = c - '0';
615 else if (islower(c))
616 x = 10 + c - 'a';
617 else
618 x = 10 + c - 'A';
619 x = x << 4;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x += c - '0';
624 else if (islower(c))
625 x += 10 + c - 'a';
626 else
627 x += 10 + c - 'A';
628 *p++ = x;
629 break;
630 }
631 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000632 PyErr_SetString(PyExc_ValueError,
633 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000634 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000635 }
636 if (strcmp(errors, "replace") == 0) {
637 *p++ = '?';
638 } else if (strcmp(errors, "ignore") == 0)
639 /* do nothing */;
640 else {
641 PyErr_Format(PyExc_ValueError,
642 "decoding error; "
643 "unknown error handling code: %.400s",
644 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000645 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 }
647#ifndef Py_USING_UNICODE
648 case 'u':
649 case 'U':
650 case 'N':
651 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000652 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000653 "Unicode escapes not legal "
654 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657#endif
658 default:
659 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000660 s--;
661 goto non_esc; /* an arbitry number of unescaped
662 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 }
664 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000665 if (p-buf < newlen)
666 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 return v;
668 failed:
669 Py_DECREF(v);
670 return NULL;
671}
672
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000673static int
674string_getsize(register PyObject *op)
675{
676 char *s;
677 int len;
678 if (PyString_AsStringAndSize(op, &s, &len))
679 return -1;
680 return len;
681}
682
683static /*const*/ char *
684string_getbuffer(register PyObject *op)
685{
686 char *s;
687 int len;
688 if (PyString_AsStringAndSize(op, &s, &len))
689 return NULL;
690 return s;
691}
692
Guido van Rossumd7047b31995-01-02 19:07:15 +0000693int
Fred Drakeba096332000-07-09 07:04:36 +0000694PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696 if (!PyString_Check(op))
697 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000698 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699}
700
701/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000702PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (!PyString_Check(op))
705 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707}
708
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709int
710PyString_AsStringAndSize(register PyObject *obj,
711 register char **s,
712 register int *len)
713{
714 if (s == NULL) {
715 PyErr_BadInternalCall();
716 return -1;
717 }
718
719 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000720#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (PyUnicode_Check(obj)) {
722 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
723 if (obj == NULL)
724 return -1;
725 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000726 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000727#endif
728 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 PyErr_Format(PyExc_TypeError,
730 "expected string or Unicode object, "
731 "%.200s found", obj->ob_type->tp_name);
732 return -1;
733 }
734 }
735
736 *s = PyString_AS_STRING(obj);
737 if (len != NULL)
738 *len = PyString_GET_SIZE(obj);
739 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
740 PyErr_SetString(PyExc_TypeError,
741 "expected string without null bytes");
742 return -1;
743 }
744 return 0;
745}
746
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747/* Methods */
748
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000749static int
Fred Drakeba096332000-07-09 07:04:36 +0000750string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751{
752 int i;
753 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000754 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000755
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000756 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000757 if (! PyString_CheckExact(op)) {
758 int ret;
759 /* A str subclass may have its own __str__ method. */
760 op = (PyStringObject *) PyObject_Str((PyObject *)op);
761 if (op == NULL)
762 return -1;
763 ret = string_print(op, fp, flags);
764 Py_DECREF(op);
765 return ret;
766 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000768#ifdef __VMS
769 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
770#else
771 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
772#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000773 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000774 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775
Thomas Wouters7e474022000-07-16 12:04:32 +0000776 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000777 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000778 if (memchr(op->ob_sval, '\'', op->ob_size) &&
779 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 quote = '"';
781
782 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783 for (i = 0; i < op->ob_size; i++) {
784 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000785 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000789 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000790 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000791 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fprintf(fp, "\\r");
793 else if (c < ' ' || c >= 0x7f)
794 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000795 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000796 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000798 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000799 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800}
801
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000802PyObject *
803PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000804{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000805 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000806 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
807 PyObject *v;
808 if (newsize > INT_MAX) {
809 PyErr_SetString(PyExc_OverflowError,
810 "string is too large to make repr");
811 }
812 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000814 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815 }
816 else {
817 register int i;
818 register char c;
819 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 int quote;
821
Thomas Wouters7e474022000-07-16 12:04:32 +0000822 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000824 if (smartquotes &&
825 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000826 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000827 quote = '"';
828
Tim Peters9161c8b2001-12-03 01:55:38 +0000829 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000832 /* There's at least enough room for a hex escape
833 and a closing quote. */
834 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
839 *p++ = '\\', *p++ = 't';
840 else if (c == '\n')
841 *p++ = '\\', *p++ = 'n';
842 else if (c == '\r')
843 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 else if (c < ' ' || c >= 0x7f) {
845 /* For performance, we don't want to call
846 PyOS_snprintf here (extra layers of
847 function call). */
848 sprintf(p, "\\x%02x", c & 0xff);
849 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000850 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000851 else
852 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000857 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000858 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000859 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000860 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864string_repr(PyObject *op)
865{
866 return PyString_Repr(op, 1);
867}
868
869static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000870string_str(PyObject *s)
871{
Tim Petersc9933152001-10-16 20:18:24 +0000872 assert(PyString_Check(s));
873 if (PyString_CheckExact(s)) {
874 Py_INCREF(s);
875 return s;
876 }
877 else {
878 /* Subtype -- return genuine string with the same value. */
879 PyStringObject *t = (PyStringObject *) s;
880 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
881 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000882}
883
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000884static int
Fred Drakeba096332000-07-09 07:04:36 +0000885string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886{
887 return a->ob_size;
888}
889
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000891string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892{
893 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000894 register PyStringObject *op;
895 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000896#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000897 if (PyUnicode_Check(bb))
898 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000899#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000900 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000901 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000902 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903 return NULL;
904 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000905#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000906 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000907 if ((a->ob_size == 0 || b->ob_size == 0) &&
908 PyString_CheckExact(a) && PyString_CheckExact(b)) {
909 if (a->ob_size == 0) {
910 Py_INCREF(bb);
911 return bb;
912 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913 Py_INCREF(a);
914 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
916 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000917 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000918 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000919 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000920 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000922 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000923 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000924 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000925 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
926 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
927 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000929#undef b
930}
931
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000933string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934{
935 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000936 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000937 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000939 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000940 if (n < 0)
941 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000942 /* watch out for overflows: the size can overflow int,
943 * and the # of bytes needed can overflow size_t
944 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000946 if (n && size / n != a->ob_size) {
947 PyErr_SetString(PyExc_OverflowError,
948 "repeated string is too long");
949 return NULL;
950 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000951 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
Tim Peters8f422462000-09-09 06:13:41 +0000955 nbytes = size * sizeof(char);
956 if (nbytes / sizeof(char) != (size_t)size ||
957 nbytes + sizeof(PyStringObject) <= nbytes) {
958 PyErr_SetString(PyExc_OverflowError,
959 "repeated string is too long");
960 return NULL;
961 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000963 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000966 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000967 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000968 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000969 op->ob_sval[size] = '\0';
970 if (a->ob_size == 1 && n > 0) {
971 memset(op->ob_sval, a->ob_sval[0] , n);
972 return (PyObject *) op;
973 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000974 i = 0;
975 if (i < size) {
976 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
977 i = (int) a->ob_size;
978 }
979 while (i < size) {
980 j = (i <= size-i) ? i : size-i;
981 memcpy(op->ob_sval+i, op->ob_sval, j);
982 i += j;
983 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985}
986
987/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
988
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000990string_slice(register PyStringObject *a, register int i, register int j)
991 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992{
993 if (i < 0)
994 i = 0;
995 if (j < 0)
996 j = 0; /* Avoid signed/unsigned bug in next line */
997 if (j > a->ob_size)
998 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000999 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1000 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001001 Py_INCREF(a);
1002 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 }
1004 if (j < i)
1005 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001006 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007}
1008
Guido van Rossum9284a572000-03-07 15:53:43 +00001009static int
Fred Drakeba096332000-07-09 07:04:36 +00001010string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001011{
Barry Warsaw817918c2002-08-06 16:58:21 +00001012 const char *lhs, *rhs, *end;
1013 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014
1015 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001016#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001017 if (PyUnicode_Check(el))
1018 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001019#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001020 if (!PyString_Check(el)) {
1021 PyErr_SetString(PyExc_TypeError,
1022 "'in <string>' requires string as left operand");
1023 return -1;
1024 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001025 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001026 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001027 rhs = PyString_AS_STRING(el);
1028 lhs = PyString_AS_STRING(a);
1029
1030 /* optimize for a single character */
1031 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001032 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001033
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001034 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001035 while (lhs <= end) {
1036 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001037 return 1;
1038 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001039
Guido van Rossum9284a572000-03-07 15:53:43 +00001040 return 0;
1041}
1042
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001043static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001044string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001045{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001047 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001049 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050 return NULL;
1051 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001052 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001053 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001054 if (v == NULL)
1055 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001056 else {
1057#ifdef COUNT_ALLOCS
1058 one_strings++;
1059#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001060 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001061 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001062 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063}
1064
Martin v. Löwiscd353062001-05-24 16:56:35 +00001065static PyObject*
1066string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001068 int c;
1069 int len_a, len_b;
1070 int min_len;
1071 PyObject *result;
1072
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001073 /* Make sure both arguments are strings. */
1074 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001075 result = Py_NotImplemented;
1076 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001077 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001078 if (a == b) {
1079 switch (op) {
1080 case Py_EQ:case Py_LE:case Py_GE:
1081 result = Py_True;
1082 goto out;
1083 case Py_NE:case Py_LT:case Py_GT:
1084 result = Py_False;
1085 goto out;
1086 }
1087 }
1088 if (op == Py_EQ) {
1089 /* Supporting Py_NE here as well does not save
1090 much time, since Py_NE is rarely used. */
1091 if (a->ob_size == b->ob_size
1092 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001093 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001094 a->ob_size) == 0)) {
1095 result = Py_True;
1096 } else {
1097 result = Py_False;
1098 }
1099 goto out;
1100 }
1101 len_a = a->ob_size; len_b = b->ob_size;
1102 min_len = (len_a < len_b) ? len_a : len_b;
1103 if (min_len > 0) {
1104 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1105 if (c==0)
1106 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1107 }else
1108 c = 0;
1109 if (c == 0)
1110 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1111 switch (op) {
1112 case Py_LT: c = c < 0; break;
1113 case Py_LE: c = c <= 0; break;
1114 case Py_EQ: assert(0); break; /* unreachable */
1115 case Py_NE: c = c != 0; break;
1116 case Py_GT: c = c > 0; break;
1117 case Py_GE: c = c >= 0; break;
1118 default:
1119 result = Py_NotImplemented;
1120 goto out;
1121 }
1122 result = c ? Py_True : Py_False;
1123 out:
1124 Py_INCREF(result);
1125 return result;
1126}
1127
1128int
1129_PyString_Eq(PyObject *o1, PyObject *o2)
1130{
1131 PyStringObject *a, *b;
1132 a = (PyStringObject*)o1;
1133 b = (PyStringObject*)o2;
1134 return a->ob_size == b->ob_size
1135 && *a->ob_sval == *b->ob_sval
1136 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001137}
1138
Guido van Rossum9bfef441993-03-29 10:43:31 +00001139static long
Fred Drakeba096332000-07-09 07:04:36 +00001140string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001141{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001142 register int len;
1143 register unsigned char *p;
1144 register long x;
1145
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001146 if (a->ob_shash != -1)
1147 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 len = a->ob_size;
1149 p = (unsigned char *) a->ob_sval;
1150 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001151 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001152 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001153 x ^= a->ob_size;
1154 if (x == -1)
1155 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001156 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001157 return x;
1158}
1159
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001160static PyObject*
1161string_subscript(PyStringObject* self, PyObject* item)
1162{
1163 if (PyInt_Check(item)) {
1164 long i = PyInt_AS_LONG(item);
1165 if (i < 0)
1166 i += PyString_GET_SIZE(self);
1167 return string_item(self,i);
1168 }
1169 else if (PyLong_Check(item)) {
1170 long i = PyLong_AsLong(item);
1171 if (i == -1 && PyErr_Occurred())
1172 return NULL;
1173 if (i < 0)
1174 i += PyString_GET_SIZE(self);
1175 return string_item(self,i);
1176 }
1177 else if (PySlice_Check(item)) {
1178 int start, stop, step, slicelength, cur, i;
1179 char* source_buf;
1180 char* result_buf;
1181 PyObject* result;
1182
1183 if (PySlice_GetIndicesEx((PySliceObject*)item,
1184 PyString_GET_SIZE(self),
1185 &start, &stop, &step, &slicelength) < 0) {
1186 return NULL;
1187 }
1188
1189 if (slicelength <= 0) {
1190 return PyString_FromStringAndSize("", 0);
1191 }
1192 else {
1193 source_buf = PyString_AsString((PyObject*)self);
1194 result_buf = PyMem_Malloc(slicelength);
1195
1196 for (cur = start, i = 0; i < slicelength;
1197 cur += step, i++) {
1198 result_buf[i] = source_buf[cur];
1199 }
1200
1201 result = PyString_FromStringAndSize(result_buf,
1202 slicelength);
1203 PyMem_Free(result_buf);
1204 return result;
1205 }
1206 }
1207 else {
1208 PyErr_SetString(PyExc_TypeError,
1209 "string indices must be integers");
1210 return NULL;
1211 }
1212}
1213
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001214static int
Fred Drakeba096332000-07-09 07:04:36 +00001215string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001216{
1217 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001218 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001219 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001220 return -1;
1221 }
1222 *ptr = (void *)self->ob_sval;
1223 return self->ob_size;
1224}
1225
1226static int
Fred Drakeba096332000-07-09 07:04:36 +00001227string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228{
Guido van Rossum045e6881997-09-08 18:30:11 +00001229 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001230 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001231 return -1;
1232}
1233
1234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001236{
1237 if ( lenp )
1238 *lenp = self->ob_size;
1239 return 1;
1240}
1241
Guido van Rossum1db70701998-10-08 02:18:52 +00001242static int
Fred Drakeba096332000-07-09 07:04:36 +00001243string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001244{
1245 if ( index != 0 ) {
1246 PyErr_SetString(PyExc_SystemError,
1247 "accessing non-existent string segment");
1248 return -1;
1249 }
1250 *ptr = self->ob_sval;
1251 return self->ob_size;
1252}
1253
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001254static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001255 (inquiry)string_length, /*sq_length*/
1256 (binaryfunc)string_concat, /*sq_concat*/
1257 (intargfunc)string_repeat, /*sq_repeat*/
1258 (intargfunc)string_item, /*sq_item*/
1259 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001260 0, /*sq_ass_item*/
1261 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001262 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001263};
1264
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001265static PyMappingMethods string_as_mapping = {
1266 (inquiry)string_length,
1267 (binaryfunc)string_subscript,
1268 0,
1269};
1270
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001271static PyBufferProcs string_as_buffer = {
1272 (getreadbufferproc)string_buffer_getreadbuf,
1273 (getwritebufferproc)string_buffer_getwritebuf,
1274 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001275 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001276};
1277
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278
1279
1280#define LEFTSTRIP 0
1281#define RIGHTSTRIP 1
1282#define BOTHSTRIP 2
1283
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001284/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001285static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1286
1287#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001288
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289
1290static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001291split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001293 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001294 PyObject* item;
1295 PyObject *list = PyList_New(0);
1296
1297 if (list == NULL)
1298 return NULL;
1299
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 for (i = j = 0; i < len; ) {
1301 while (i < len && isspace(Py_CHARMASK(s[i])))
1302 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001304 while (i < len && !isspace(Py_CHARMASK(s[i])))
1305 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001307 if (maxsplit-- <= 0)
1308 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1310 if (item == NULL)
1311 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001312 err = PyList_Append(list, item);
1313 Py_DECREF(item);
1314 if (err < 0)
1315 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316 while (i < len && isspace(Py_CHARMASK(s[i])))
1317 i++;
1318 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319 }
1320 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001321 if (j < len) {
1322 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1323 if (item == NULL)
1324 goto finally;
1325 err = PyList_Append(list, item);
1326 Py_DECREF(item);
1327 if (err < 0)
1328 goto finally;
1329 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 return list;
1331 finally:
1332 Py_DECREF(list);
1333 return NULL;
1334}
1335
1336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001337PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338"S.split([sep [,maxsplit]]) -> list of strings\n\
1339\n\
1340Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001342splits are done. If sep is not specified or is None, any\n\
1343whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344
1345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001346string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347{
1348 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 int maxsplit = -1;
1350 const char *s = PyString_AS_STRING(self), *sub;
1351 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 if (maxsplit < 0)
1356 maxsplit = INT_MAX;
1357 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 if (PyString_Check(subobj)) {
1360 sub = PyString_AS_STRING(subobj);
1361 n = PyString_GET_SIZE(subobj);
1362 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001363#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001364 else if (PyUnicode_Check(subobj))
1365 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001366#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001367 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1368 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369 if (n == 0) {
1370 PyErr_SetString(PyExc_ValueError, "empty separator");
1371 return NULL;
1372 }
1373
1374 list = PyList_New(0);
1375 if (list == NULL)
1376 return NULL;
1377
1378 i = j = 0;
1379 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001380 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001381 if (maxsplit-- <= 0)
1382 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1384 if (item == NULL)
1385 goto fail;
1386 err = PyList_Append(list, item);
1387 Py_DECREF(item);
1388 if (err < 0)
1389 goto fail;
1390 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 }
1392 else
1393 i++;
1394 }
1395 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1396 if (item == NULL)
1397 goto fail;
1398 err = PyList_Append(list, item);
1399 Py_DECREF(item);
1400 if (err < 0)
1401 goto fail;
1402
1403 return list;
1404
1405 fail:
1406 Py_DECREF(list);
1407 return NULL;
1408}
1409
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001410static PyObject *
1411rsplit_whitespace(const char *s, int len, int maxsplit)
1412{
1413 int i, j, err;
1414 PyObject* item;
1415 PyObject *list = PyList_New(0);
1416
1417 if (list == NULL)
1418 return NULL;
1419
1420 for (i = j = len - 1; i >= 0; ) {
1421 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1422 i--;
1423 j = i;
1424 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1425 i--;
1426 if (j > i) {
1427 if (maxsplit-- <= 0)
1428 break;
1429 item = PyString_FromStringAndSize(s+i+1, (int)(j-i));
1430 if (item == NULL)
1431 goto finally;
1432 err = PyList_Insert(list, 0, item);
1433 Py_DECREF(item);
1434 if (err < 0)
1435 goto finally;
1436 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1437 i--;
1438 j = i;
1439 }
1440 }
1441 if (j >= 0) {
1442 item = PyString_FromStringAndSize(s, (int)(j + 1));
1443 if (item == NULL)
1444 goto finally;
1445 err = PyList_Insert(list, 0, item);
1446 Py_DECREF(item);
1447 if (err < 0)
1448 goto finally;
1449 }
1450 return list;
1451 finally:
1452 Py_DECREF(list);
1453 return NULL;
1454}
1455
1456
1457PyDoc_STRVAR(rsplit__doc__,
1458"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1459\n\
1460Return a list of the words in the string S, using sep as the\n\
1461delimiter string, starting at the end of the string and working\n\
1462to the front. If maxsplit is given, at most maxsplit splits are\n\
1463done. If sep is not specified or is None, any whitespace string\n\
1464is a separator.");
1465
1466static PyObject *
1467string_rsplit(PyStringObject *self, PyObject *args)
1468{
1469 int len = PyString_GET_SIZE(self), n, i, j, err;
1470 int maxsplit = -1;
1471 const char *s = PyString_AS_STRING(self), *sub;
1472 PyObject *list, *item, *subobj = Py_None;
1473
1474 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1475 return NULL;
1476 if (maxsplit < 0)
1477 maxsplit = INT_MAX;
1478 if (subobj == Py_None)
1479 return rsplit_whitespace(s, len, maxsplit);
1480 if (PyString_Check(subobj)) {
1481 sub = PyString_AS_STRING(subobj);
1482 n = PyString_GET_SIZE(subobj);
1483 }
1484#ifdef Py_USING_UNICODE
1485 else if (PyUnicode_Check(subobj))
1486 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1487#endif
1488 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1489 return NULL;
1490 if (n == 0) {
1491 PyErr_SetString(PyExc_ValueError, "empty separator");
1492 return NULL;
1493 }
1494
1495 list = PyList_New(0);
1496 if (list == NULL)
1497 return NULL;
1498
1499 j = len;
1500 i = j - n;
1501 while (i >= 0) {
1502 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1503 if (maxsplit-- <= 0)
1504 break;
1505 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1506 if (item == NULL)
1507 goto fail;
1508 err = PyList_Insert(list, 0, item);
1509 Py_DECREF(item);
1510 if (err < 0)
1511 goto fail;
1512 j = i;
1513 i -= n;
1514 }
1515 else
1516 i--;
1517 }
1518 item = PyString_FromStringAndSize(s, j);
1519 if (item == NULL)
1520 goto fail;
1521 err = PyList_Insert(list, 0, item);
1522 Py_DECREF(item);
1523 if (err < 0)
1524 goto fail;
1525
1526 return list;
1527
1528 fail:
1529 Py_DECREF(list);
1530 return NULL;
1531}
1532
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001534PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001535"S.join(sequence) -> string\n\
1536\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001537Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001538sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001539
1540static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001541string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542{
1543 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001544 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001546 char *p;
1547 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001548 size_t sz = 0;
1549 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001550 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551
Tim Peters19fe14e2001-01-19 03:03:47 +00001552 seq = PySequence_Fast(orig, "");
1553 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001554 if (PyErr_ExceptionMatches(PyExc_TypeError))
1555 PyErr_Format(PyExc_TypeError,
1556 "sequence expected, %.80s found",
1557 orig->ob_type->tp_name);
1558 return NULL;
1559 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001560
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001561 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001562 if (seqlen == 0) {
1563 Py_DECREF(seq);
1564 return PyString_FromString("");
1565 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001567 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001568 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1569 PyErr_Format(PyExc_TypeError,
1570 "sequence item 0: expected string,"
1571 " %.80s found",
1572 item->ob_type->tp_name);
1573 Py_DECREF(seq);
1574 return NULL;
1575 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001576 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001577 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001578 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001580
Tim Peters19fe14e2001-01-19 03:03:47 +00001581 /* There are at least two things to join. Do a pre-pass to figure out
1582 * the total amount of space we'll need (sz), see whether any argument
1583 * is absurd, and defer to the Unicode join if appropriate.
1584 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001585 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001586 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001587 item = PySequence_Fast_GET_ITEM(seq, i);
1588 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001589#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001590 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001591 /* Defer to Unicode join.
1592 * CAUTION: There's no gurantee that the
1593 * original sequence can be iterated over
1594 * again, so we must pass seq here.
1595 */
1596 PyObject *result;
1597 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001598 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001599 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001600 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001601#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001602 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001603 "sequence item %i: expected string,"
1604 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001605 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001606 Py_DECREF(seq);
1607 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001608 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001609 sz += PyString_GET_SIZE(item);
1610 if (i != 0)
1611 sz += seplen;
1612 if (sz < old_sz || sz > INT_MAX) {
1613 PyErr_SetString(PyExc_OverflowError,
1614 "join() is too long for a Python string");
1615 Py_DECREF(seq);
1616 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001618 }
1619
1620 /* Allocate result space. */
1621 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1622 if (res == NULL) {
1623 Py_DECREF(seq);
1624 return NULL;
1625 }
1626
1627 /* Catenate everything. */
1628 p = PyString_AS_STRING(res);
1629 for (i = 0; i < seqlen; ++i) {
1630 size_t n;
1631 item = PySequence_Fast_GET_ITEM(seq, i);
1632 n = PyString_GET_SIZE(item);
1633 memcpy(p, PyString_AS_STRING(item), n);
1634 p += n;
1635 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001636 memcpy(p, sep, seplen);
1637 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001640
Jeremy Hylton49048292000-07-11 03:28:17 +00001641 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001642 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643}
1644
Tim Peters52e155e2001-06-16 05:42:57 +00001645PyObject *
1646_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001647{
Tim Petersa7259592001-06-16 05:11:17 +00001648 assert(sep != NULL && PyString_Check(sep));
1649 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001650 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001651}
1652
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001653static void
1654string_adjust_indices(int *start, int *end, int len)
1655{
1656 if (*end > len)
1657 *end = len;
1658 else if (*end < 0)
1659 *end += len;
1660 if (*end < 0)
1661 *end = 0;
1662 if (*start < 0)
1663 *start += len;
1664 if (*start < 0)
1665 *start = 0;
1666}
1667
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668static long
Fred Drakeba096332000-07-09 07:04:36 +00001669string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672 int len = PyString_GET_SIZE(self);
1673 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001676 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001677 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001678 return -2;
1679 if (PyString_Check(subobj)) {
1680 sub = PyString_AS_STRING(subobj);
1681 n = PyString_GET_SIZE(subobj);
1682 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001683#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001684 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001685 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001686#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001687 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688 return -2;
1689
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001690 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691
Guido van Rossum4c08d552000-03-10 22:55:18 +00001692 if (dir > 0) {
1693 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001695 last -= n;
1696 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001697 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001698 return (long)i;
1699 }
1700 else {
1701 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001702
Guido van Rossum4c08d552000-03-10 22:55:18 +00001703 if (n == 0 && i <= last)
1704 return (long)last;
1705 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001706 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001707 return (long)j;
1708 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001709
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710 return -1;
1711}
1712
1713
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001714PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001715"S.find(sub [,start [,end]]) -> int\n\
1716\n\
1717Return the lowest index in S where substring sub is found,\n\
1718such that sub is contained within s[start,end]. Optional\n\
1719arguments start and end are interpreted as in slice notation.\n\
1720\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001721Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722
1723static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001724string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001726 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 if (result == -2)
1728 return NULL;
1729 return PyInt_FromLong(result);
1730}
1731
1732
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001733PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001734"S.index(sub [,start [,end]]) -> int\n\
1735\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001736Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737
1738static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001739string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001741 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 if (result == -2)
1743 return NULL;
1744 if (result == -1) {
1745 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001746 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 return NULL;
1748 }
1749 return PyInt_FromLong(result);
1750}
1751
1752
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001753PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754"S.rfind(sub [,start [,end]]) -> int\n\
1755\n\
1756Return the highest index in S where substring sub is found,\n\
1757such that sub is contained within s[start,end]. Optional\n\
1758arguments start and end are interpreted as in slice notation.\n\
1759\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001760Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761
1762static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001763string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001765 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766 if (result == -2)
1767 return NULL;
1768 return PyInt_FromLong(result);
1769}
1770
1771
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001772PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773"S.rindex(sub [,start [,end]]) -> int\n\
1774\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776
1777static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001778string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001780 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781 if (result == -2)
1782 return NULL;
1783 if (result == -1) {
1784 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001785 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786 return NULL;
1787 }
1788 return PyInt_FromLong(result);
1789}
1790
1791
1792static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001793do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1794{
1795 char *s = PyString_AS_STRING(self);
1796 int len = PyString_GET_SIZE(self);
1797 char *sep = PyString_AS_STRING(sepobj);
1798 int seplen = PyString_GET_SIZE(sepobj);
1799 int i, j;
1800
1801 i = 0;
1802 if (striptype != RIGHTSTRIP) {
1803 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1804 i++;
1805 }
1806 }
1807
1808 j = len;
1809 if (striptype != LEFTSTRIP) {
1810 do {
1811 j--;
1812 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1813 j++;
1814 }
1815
1816 if (i == 0 && j == len && PyString_CheckExact(self)) {
1817 Py_INCREF(self);
1818 return (PyObject*)self;
1819 }
1820 else
1821 return PyString_FromStringAndSize(s+i, j-i);
1822}
1823
1824
1825static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001826do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827{
1828 char *s = PyString_AS_STRING(self);
1829 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 i = 0;
1832 if (striptype != RIGHTSTRIP) {
1833 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1834 i++;
1835 }
1836 }
1837
1838 j = len;
1839 if (striptype != LEFTSTRIP) {
1840 do {
1841 j--;
1842 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1843 j++;
1844 }
1845
Tim Peters8fa5dd02001-09-12 02:18:30 +00001846 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 Py_INCREF(self);
1848 return (PyObject*)self;
1849 }
1850 else
1851 return PyString_FromStringAndSize(s+i, j-i);
1852}
1853
1854
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001855static PyObject *
1856do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1857{
1858 PyObject *sep = NULL;
1859
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001860 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001861 return NULL;
1862
1863 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001864 if (PyString_Check(sep))
1865 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001866#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001867 else if (PyUnicode_Check(sep)) {
1868 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1869 PyObject *res;
1870 if (uniself==NULL)
1871 return NULL;
1872 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1873 striptype, sep);
1874 Py_DECREF(uniself);
1875 return res;
1876 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001877#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001878 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001879 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001880#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001881 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001882#else
1883 "%s arg must be None or str",
1884#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001885 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001886 return NULL;
1887 }
1888 return do_xstrip(self, striptype, sep);
1889 }
1890
1891 return do_strip(self, striptype);
1892}
1893
1894
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001895PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001896"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897\n\
1898Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001899whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001900If chars is given and not None, remove characters in chars instead.\n\
1901If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902
1903static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001904string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001906 if (PyTuple_GET_SIZE(args) == 0)
1907 return do_strip(self, BOTHSTRIP); /* Common case */
1908 else
1909 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910}
1911
1912
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001913PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001914"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001916Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001917If chars is given and not None, remove characters in chars instead.\n\
1918If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919
1920static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001921string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001923 if (PyTuple_GET_SIZE(args) == 0)
1924 return do_strip(self, LEFTSTRIP); /* Common case */
1925 else
1926 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927}
1928
1929
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001930PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001931"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001933Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001934If chars is given and not None, remove characters in chars instead.\n\
1935If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936
1937static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001938string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001940 if (PyTuple_GET_SIZE(args) == 0)
1941 return do_strip(self, RIGHTSTRIP); /* Common case */
1942 else
1943 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944}
1945
1946
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001947PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948"S.lower() -> string\n\
1949\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001950Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951
1952static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001953string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954{
1955 char *s = PyString_AS_STRING(self), *s_new;
1956 int i, n = PyString_GET_SIZE(self);
1957 PyObject *new;
1958
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959 new = PyString_FromStringAndSize(NULL, n);
1960 if (new == NULL)
1961 return NULL;
1962 s_new = PyString_AsString(new);
1963 for (i = 0; i < n; i++) {
1964 int c = Py_CHARMASK(*s++);
1965 if (isupper(c)) {
1966 *s_new = tolower(c);
1967 } else
1968 *s_new = c;
1969 s_new++;
1970 }
1971 return new;
1972}
1973
1974
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001975PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976"S.upper() -> string\n\
1977\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001978Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979
1980static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001981string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982{
1983 char *s = PyString_AS_STRING(self), *s_new;
1984 int i, n = PyString_GET_SIZE(self);
1985 PyObject *new;
1986
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987 new = PyString_FromStringAndSize(NULL, n);
1988 if (new == NULL)
1989 return NULL;
1990 s_new = PyString_AsString(new);
1991 for (i = 0; i < n; i++) {
1992 int c = Py_CHARMASK(*s++);
1993 if (islower(c)) {
1994 *s_new = toupper(c);
1995 } else
1996 *s_new = c;
1997 s_new++;
1998 }
1999 return new;
2000}
2001
2002
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002003PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002004"S.title() -> string\n\
2005\n\
2006Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002007characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002008
2009static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002010string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002011{
2012 char *s = PyString_AS_STRING(self), *s_new;
2013 int i, n = PyString_GET_SIZE(self);
2014 int previous_is_cased = 0;
2015 PyObject *new;
2016
Guido van Rossum4c08d552000-03-10 22:55:18 +00002017 new = PyString_FromStringAndSize(NULL, n);
2018 if (new == NULL)
2019 return NULL;
2020 s_new = PyString_AsString(new);
2021 for (i = 0; i < n; i++) {
2022 int c = Py_CHARMASK(*s++);
2023 if (islower(c)) {
2024 if (!previous_is_cased)
2025 c = toupper(c);
2026 previous_is_cased = 1;
2027 } else if (isupper(c)) {
2028 if (previous_is_cased)
2029 c = tolower(c);
2030 previous_is_cased = 1;
2031 } else
2032 previous_is_cased = 0;
2033 *s_new++ = c;
2034 }
2035 return new;
2036}
2037
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002038PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039"S.capitalize() -> string\n\
2040\n\
2041Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002042capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043
2044static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002045string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046{
2047 char *s = PyString_AS_STRING(self), *s_new;
2048 int i, n = PyString_GET_SIZE(self);
2049 PyObject *new;
2050
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051 new = PyString_FromStringAndSize(NULL, n);
2052 if (new == NULL)
2053 return NULL;
2054 s_new = PyString_AsString(new);
2055 if (0 < n) {
2056 int c = Py_CHARMASK(*s++);
2057 if (islower(c))
2058 *s_new = toupper(c);
2059 else
2060 *s_new = c;
2061 s_new++;
2062 }
2063 for (i = 1; i < n; i++) {
2064 int c = Py_CHARMASK(*s++);
2065 if (isupper(c))
2066 *s_new = tolower(c);
2067 else
2068 *s_new = c;
2069 s_new++;
2070 }
2071 return new;
2072}
2073
2074
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002075PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076"S.count(sub[, start[, end]]) -> int\n\
2077\n\
2078Return the number of occurrences of substring sub in string\n\
2079S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002080interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081
2082static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002083string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086 int len = PyString_GET_SIZE(self), n;
2087 int i = 0, last = INT_MAX;
2088 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002089 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090
Guido van Rossumc6821402000-05-08 14:08:05 +00002091 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2092 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002094
Guido van Rossum4c08d552000-03-10 22:55:18 +00002095 if (PyString_Check(subobj)) {
2096 sub = PyString_AS_STRING(subobj);
2097 n = PyString_GET_SIZE(subobj);
2098 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002099#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002100 else if (PyUnicode_Check(subobj)) {
2101 int count;
2102 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2103 if (count == -1)
2104 return NULL;
2105 else
2106 return PyInt_FromLong((long) count);
2107 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002108#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2110 return NULL;
2111
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002112 string_adjust_indices(&i, &last, len);
2113
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114 m = last + 1 - n;
2115 if (n == 0)
2116 return PyInt_FromLong((long) (m-i));
2117
2118 r = 0;
2119 while (i < m) {
2120 if (!memcmp(s+i, sub, n)) {
2121 r++;
2122 i += n;
2123 } else {
2124 i++;
2125 }
2126 }
2127 return PyInt_FromLong((long) r);
2128}
2129
2130
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002131PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132"S.swapcase() -> string\n\
2133\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002135converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136
2137static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002138string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139{
2140 char *s = PyString_AS_STRING(self), *s_new;
2141 int i, n = PyString_GET_SIZE(self);
2142 PyObject *new;
2143
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002144 new = PyString_FromStringAndSize(NULL, n);
2145 if (new == NULL)
2146 return NULL;
2147 s_new = PyString_AsString(new);
2148 for (i = 0; i < n; i++) {
2149 int c = Py_CHARMASK(*s++);
2150 if (islower(c)) {
2151 *s_new = toupper(c);
2152 }
2153 else if (isupper(c)) {
2154 *s_new = tolower(c);
2155 }
2156 else
2157 *s_new = c;
2158 s_new++;
2159 }
2160 return new;
2161}
2162
2163
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002164PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165"S.translate(table [,deletechars]) -> string\n\
2166\n\
2167Return a copy of the string S, where all characters occurring\n\
2168in the optional argument deletechars are removed, and the\n\
2169remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002170translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171
2172static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002173string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175 register char *input, *output;
2176 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177 register int i, c, changed = 0;
2178 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002179 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180 int inlen, tablen, dellen = 0;
2181 PyObject *result;
2182 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002183 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002185 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188
2189 if (PyString_Check(tableobj)) {
2190 table1 = PyString_AS_STRING(tableobj);
2191 tablen = PyString_GET_SIZE(tableobj);
2192 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002193#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002195 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196 parameter; instead a mapping to None will cause characters
2197 to be deleted. */
2198 if (delobj != NULL) {
2199 PyErr_SetString(PyExc_TypeError,
2200 "deletions are implemented differently for unicode");
2201 return NULL;
2202 }
2203 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2204 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002205#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002206 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002208
Martin v. Löwis00b61272002-12-12 20:03:19 +00002209 if (tablen != 256) {
2210 PyErr_SetString(PyExc_ValueError,
2211 "translation table must be 256 characters long");
2212 return NULL;
2213 }
2214
Guido van Rossum4c08d552000-03-10 22:55:18 +00002215 if (delobj != NULL) {
2216 if (PyString_Check(delobj)) {
2217 del_table = PyString_AS_STRING(delobj);
2218 dellen = PyString_GET_SIZE(delobj);
2219 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002220#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002221 else if (PyUnicode_Check(delobj)) {
2222 PyErr_SetString(PyExc_TypeError,
2223 "deletions are implemented differently for unicode");
2224 return NULL;
2225 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002226#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002227 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2228 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002229 }
2230 else {
2231 del_table = NULL;
2232 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 }
2234
2235 table = table1;
2236 inlen = PyString_Size(input_obj);
2237 result = PyString_FromStringAndSize((char *)NULL, inlen);
2238 if (result == NULL)
2239 return NULL;
2240 output_start = output = PyString_AsString(result);
2241 input = PyString_AsString(input_obj);
2242
2243 if (dellen == 0) {
2244 /* If no deletions are required, use faster code */
2245 for (i = inlen; --i >= 0; ) {
2246 c = Py_CHARMASK(*input++);
2247 if (Py_CHARMASK((*output++ = table[c])) != c)
2248 changed = 1;
2249 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002250 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251 return result;
2252 Py_DECREF(result);
2253 Py_INCREF(input_obj);
2254 return input_obj;
2255 }
2256
2257 for (i = 0; i < 256; i++)
2258 trans_table[i] = Py_CHARMASK(table[i]);
2259
2260 for (i = 0; i < dellen; i++)
2261 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2262
2263 for (i = inlen; --i >= 0; ) {
2264 c = Py_CHARMASK(*input++);
2265 if (trans_table[c] != -1)
2266 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2267 continue;
2268 changed = 1;
2269 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002270 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271 Py_DECREF(result);
2272 Py_INCREF(input_obj);
2273 return input_obj;
2274 }
2275 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002276 if (inlen > 0)
2277 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278 return result;
2279}
2280
2281
2282/* What follows is used for implementing replace(). Perry Stoll. */
2283
2284/*
2285 mymemfind
2286
2287 strstr replacement for arbitrary blocks of memory.
2288
Barry Warsaw51ac5802000-03-20 16:36:48 +00002289 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290 contents of memory pointed to by PAT. Returns the index into MEM if
2291 found, or -1 if not found. If len of PAT is greater than length of
2292 MEM, the function returns -1.
2293*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002294static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002295mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296{
2297 register int ii;
2298
2299 /* pattern can not occur in the last pat_len-1 chars */
2300 len -= pat_len;
2301
2302 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002303 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304 return ii;
2305 }
2306 }
2307 return -1;
2308}
2309
2310/*
2311 mymemcnt
2312
2313 Return the number of distinct times PAT is found in MEM.
2314 meaning mem=1111 and pat==11 returns 2.
2315 mem=11111 and pat==11 also return 2.
2316 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002317static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002318mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319{
2320 register int offset = 0;
2321 int nfound = 0;
2322
2323 while (len >= 0) {
2324 offset = mymemfind(mem, len, pat, pat_len);
2325 if (offset == -1)
2326 break;
2327 mem += offset + pat_len;
2328 len -= offset + pat_len;
2329 nfound++;
2330 }
2331 return nfound;
2332}
2333
2334/*
2335 mymemreplace
2336
Thomas Wouters7e474022000-07-16 12:04:32 +00002337 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 replaced with SUB.
2339
Thomas Wouters7e474022000-07-16 12:04:32 +00002340 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002341 of PAT in STR, then the original string is returned. Otherwise, a new
2342 string is allocated here and returned.
2343
2344 on return, out_len is:
2345 the length of output string, or
2346 -1 if the input string is returned, or
2347 unchanged if an error occurs (no memory).
2348
2349 return value is:
2350 the new string allocated locally, or
2351 NULL if an error occurred.
2352*/
2353static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002354mymemreplace(const char *str, int len, /* input string */
2355 const char *pat, int pat_len, /* pattern string to find */
2356 const char *sub, int sub_len, /* substitution string */
2357 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002358 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002359{
2360 char *out_s;
2361 char *new_s;
2362 int nfound, offset, new_len;
2363
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002364 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365 goto return_same;
2366
2367 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002368 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002369 if (count < 0)
2370 count = INT_MAX;
2371 else if (nfound > count)
2372 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002373 if (nfound == 0)
2374 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002375
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002377 if (new_len == 0) {
2378 /* Have to allocate something for the caller to free(). */
2379 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002380 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002381 return NULL;
2382 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002384 else {
2385 assert(new_len > 0);
2386 new_s = (char *)PyMem_MALLOC(new_len);
2387 if (new_s == NULL)
2388 return NULL;
2389 out_s = new_s;
2390
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002391 if (pat_len > 0) {
2392 for (; nfound > 0; --nfound) {
2393 /* find index of next instance of pattern */
2394 offset = mymemfind(str, len, pat, pat_len);
2395 if (offset == -1)
2396 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002397
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002398 /* copy non matching part of input string */
2399 memcpy(new_s, str, offset);
2400 str += offset + pat_len;
2401 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002402
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002403 /* copy substitute into the output string */
2404 new_s += offset;
2405 memcpy(new_s, sub, sub_len);
2406 new_s += sub_len;
2407 }
2408 /* copy any remaining values into output string */
2409 if (len > 0)
2410 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002411 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002412 else {
2413 for (;;++str, --len) {
2414 memcpy(new_s, sub, sub_len);
2415 new_s += sub_len;
2416 if (--nfound <= 0) {
2417 memcpy(new_s, str, len);
2418 break;
2419 }
2420 *new_s++ = *str;
2421 }
2422 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002423 }
2424 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425 return out_s;
2426
2427 return_same:
2428 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002429 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430}
2431
2432
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002433PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002434"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002435\n\
2436Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002437old replaced by new. If the optional argument count is\n\
2438given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439
2440static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002441string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002442{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002443 const char *str = PyString_AS_STRING(self), *sub, *repl;
2444 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002445 const int len = PyString_GET_SIZE(self);
2446 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002447 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002448 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002450
Guido van Rossum4c08d552000-03-10 22:55:18 +00002451 if (!PyArg_ParseTuple(args, "OO|i:replace",
2452 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002453 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002454
2455 if (PyString_Check(subobj)) {
2456 sub = PyString_AS_STRING(subobj);
2457 sub_len = PyString_GET_SIZE(subobj);
2458 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002459#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002460 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002461 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002462 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002463#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002464 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2465 return NULL;
2466
2467 if (PyString_Check(replobj)) {
2468 repl = PyString_AS_STRING(replobj);
2469 repl_len = PyString_GET_SIZE(replobj);
2470 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002471#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002472 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002473 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002474 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002475#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002476 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2477 return NULL;
2478
Guido van Rossum4c08d552000-03-10 22:55:18 +00002479 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480 if (new_s == NULL) {
2481 PyErr_NoMemory();
2482 return NULL;
2483 }
2484 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002485 if (PyString_CheckExact(self)) {
2486 /* we're returning another reference to self */
2487 new = (PyObject*)self;
2488 Py_INCREF(new);
2489 }
2490 else {
2491 new = PyString_FromStringAndSize(str, len);
2492 if (new == NULL)
2493 return NULL;
2494 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002495 }
2496 else {
2497 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002498 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002499 }
2500 return new;
2501}
2502
2503
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002504PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002505"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002507Return True if S starts with the specified prefix, False otherwise.\n\
2508With optional start, test S beginning at that position.\n\
2509With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002510
2511static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002512string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002513{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002514 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002515 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002516 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002517 int plen;
2518 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002519 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002520 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521
Guido van Rossumc6821402000-05-08 14:08:05 +00002522 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2523 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002524 return NULL;
2525 if (PyString_Check(subobj)) {
2526 prefix = PyString_AS_STRING(subobj);
2527 plen = PyString_GET_SIZE(subobj);
2528 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002529#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002530 else if (PyUnicode_Check(subobj)) {
2531 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002532 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002533 subobj, start, end, -1);
2534 if (rc == -1)
2535 return NULL;
2536 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002537 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002538 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002539#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002540 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002541 return NULL;
2542
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002543 string_adjust_indices(&start, &end, len);
2544
2545 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002546 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002547
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002548 if (end-start >= plen)
2549 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2550 else
2551 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002552}
2553
2554
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002555PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002556"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002557\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002558Return True if S ends with the specified suffix, False otherwise.\n\
2559With optional start, test S beginning at that position.\n\
2560With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002561
2562static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002563string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002564{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002565 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002566 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002567 const char* suffix;
2568 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002569 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002570 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002571 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002572
Guido van Rossumc6821402000-05-08 14:08:05 +00002573 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2574 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002575 return NULL;
2576 if (PyString_Check(subobj)) {
2577 suffix = PyString_AS_STRING(subobj);
2578 slen = PyString_GET_SIZE(subobj);
2579 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002580#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002581 else if (PyUnicode_Check(subobj)) {
2582 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002583 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002584 subobj, start, end, +1);
2585 if (rc == -1)
2586 return NULL;
2587 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002588 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002589 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002590#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002591 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002592 return NULL;
2593
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002594 string_adjust_indices(&start, &end, len);
2595
2596 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002597 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002598
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002599 if (end-slen > start)
2600 start = end - slen;
2601 if (end-start >= slen)
2602 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2603 else
2604 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002605}
2606
2607
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002608PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002609"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002610\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002611Encodes S using the codec registered for encoding. encoding defaults\n\
2612to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002613handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002614a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2615'xmlcharrefreplace' as well as any other name registered with\n\
2616codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002617
2618static PyObject *
2619string_encode(PyStringObject *self, PyObject *args)
2620{
2621 char *encoding = NULL;
2622 char *errors = NULL;
2623 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2624 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002625 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2626}
2627
2628
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002629PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002630"S.decode([encoding[,errors]]) -> object\n\
2631\n\
2632Decodes S using the codec registered for encoding. encoding defaults\n\
2633to the default encoding. errors may be given to set a different error\n\
2634handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002635a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2636as well as any other name registerd with codecs.register_error that is\n\
2637able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002638
2639static PyObject *
2640string_decode(PyStringObject *self, PyObject *args)
2641{
2642 char *encoding = NULL;
2643 char *errors = NULL;
2644 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2645 return NULL;
2646 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002647}
2648
2649
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002650PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002651"S.expandtabs([tabsize]) -> string\n\
2652\n\
2653Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002654If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655
2656static PyObject*
2657string_expandtabs(PyStringObject *self, PyObject *args)
2658{
2659 const char *e, *p;
2660 char *q;
2661 int i, j;
2662 PyObject *u;
2663 int tabsize = 8;
2664
2665 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2666 return NULL;
2667
Thomas Wouters7e474022000-07-16 12:04:32 +00002668 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669 i = j = 0;
2670 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2671 for (p = PyString_AS_STRING(self); p < e; p++)
2672 if (*p == '\t') {
2673 if (tabsize > 0)
2674 j += tabsize - (j % tabsize);
2675 }
2676 else {
2677 j++;
2678 if (*p == '\n' || *p == '\r') {
2679 i += j;
2680 j = 0;
2681 }
2682 }
2683
2684 /* Second pass: create output string and fill it */
2685 u = PyString_FromStringAndSize(NULL, i + j);
2686 if (!u)
2687 return NULL;
2688
2689 j = 0;
2690 q = PyString_AS_STRING(u);
2691
2692 for (p = PyString_AS_STRING(self); p < e; p++)
2693 if (*p == '\t') {
2694 if (tabsize > 0) {
2695 i = tabsize - (j % tabsize);
2696 j += i;
2697 while (i--)
2698 *q++ = ' ';
2699 }
2700 }
2701 else {
2702 j++;
2703 *q++ = *p;
2704 if (*p == '\n' || *p == '\r')
2705 j = 0;
2706 }
2707
2708 return u;
2709}
2710
Tim Peters8fa5dd02001-09-12 02:18:30 +00002711static PyObject *
2712pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002713{
2714 PyObject *u;
2715
2716 if (left < 0)
2717 left = 0;
2718 if (right < 0)
2719 right = 0;
2720
Tim Peters8fa5dd02001-09-12 02:18:30 +00002721 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002722 Py_INCREF(self);
2723 return (PyObject *)self;
2724 }
2725
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002726 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002727 left + PyString_GET_SIZE(self) + right);
2728 if (u) {
2729 if (left)
2730 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002731 memcpy(PyString_AS_STRING(u) + left,
2732 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002733 PyString_GET_SIZE(self));
2734 if (right)
2735 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2736 fill, right);
2737 }
2738
2739 return u;
2740}
2741
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002742PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002743"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002744"\n"
2745"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002746"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002747
2748static PyObject *
2749string_ljust(PyStringObject *self, PyObject *args)
2750{
2751 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002752 char fillchar = ' ';
2753
2754 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002755 return NULL;
2756
Tim Peters8fa5dd02001-09-12 02:18:30 +00002757 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002758 Py_INCREF(self);
2759 return (PyObject*) self;
2760 }
2761
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002762 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002763}
2764
2765
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002766PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002767"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002768"\n"
2769"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002770"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002771
2772static PyObject *
2773string_rjust(PyStringObject *self, PyObject *args)
2774{
2775 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002776 char fillchar = ' ';
2777
2778 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002779 return NULL;
2780
Tim Peters8fa5dd02001-09-12 02:18:30 +00002781 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002782 Py_INCREF(self);
2783 return (PyObject*) self;
2784 }
2785
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002786 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002787}
2788
2789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002790PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002791"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002792"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002793"Return S centered in a string of length width. Padding is\n"
2794"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002795
2796static PyObject *
2797string_center(PyStringObject *self, PyObject *args)
2798{
2799 int marg, left;
2800 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002801 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002802
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002803 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002804 return NULL;
2805
Tim Peters8fa5dd02001-09-12 02:18:30 +00002806 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002807 Py_INCREF(self);
2808 return (PyObject*) self;
2809 }
2810
2811 marg = width - PyString_GET_SIZE(self);
2812 left = marg / 2 + (marg & width & 1);
2813
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002814 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002815}
2816
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002817PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002818"S.zfill(width) -> string\n"
2819"\n"
2820"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002821"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002822
2823static PyObject *
2824string_zfill(PyStringObject *self, PyObject *args)
2825{
2826 int fill;
2827 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002828 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002829
2830 int width;
2831 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2832 return NULL;
2833
2834 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002835 if (PyString_CheckExact(self)) {
2836 Py_INCREF(self);
2837 return (PyObject*) self;
2838 }
2839 else
2840 return PyString_FromStringAndSize(
2841 PyString_AS_STRING(self),
2842 PyString_GET_SIZE(self)
2843 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002844 }
2845
2846 fill = width - PyString_GET_SIZE(self);
2847
2848 s = pad(self, fill, 0, '0');
2849
2850 if (s == NULL)
2851 return NULL;
2852
2853 p = PyString_AS_STRING(s);
2854 if (p[fill] == '+' || p[fill] == '-') {
2855 /* move sign to beginning of string */
2856 p[0] = p[fill];
2857 p[fill] = '0';
2858 }
2859
2860 return (PyObject*) s;
2861}
2862
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002863PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002864"S.isspace() -> bool\n\
2865\n\
2866Return True if all characters in S are whitespace\n\
2867and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002868
2869static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002870string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871{
Fred Drakeba096332000-07-09 07:04:36 +00002872 register const unsigned char *p
2873 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002874 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002875
Guido van Rossum4c08d552000-03-10 22:55:18 +00002876 /* Shortcut for single character strings */
2877 if (PyString_GET_SIZE(self) == 1 &&
2878 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002879 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002880
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002881 /* Special case for empty strings */
2882 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002883 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002884
Guido van Rossum4c08d552000-03-10 22:55:18 +00002885 e = p + PyString_GET_SIZE(self);
2886 for (; p < e; p++) {
2887 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002888 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002889 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002890 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002891}
2892
2893
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002894PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002895"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002896\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002897Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002898and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002899
2900static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002901string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002902{
Fred Drakeba096332000-07-09 07:04:36 +00002903 register const unsigned char *p
2904 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002905 register const unsigned char *e;
2906
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002907 /* Shortcut for single character strings */
2908 if (PyString_GET_SIZE(self) == 1 &&
2909 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002910 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002911
2912 /* Special case for empty strings */
2913 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002914 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002915
2916 e = p + PyString_GET_SIZE(self);
2917 for (; p < e; p++) {
2918 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002919 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002920 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002921 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002922}
2923
2924
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002925PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002926"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002927\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002928Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002929and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002930
2931static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002932string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002933{
Fred Drakeba096332000-07-09 07:04:36 +00002934 register const unsigned char *p
2935 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002936 register const unsigned char *e;
2937
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002938 /* Shortcut for single character strings */
2939 if (PyString_GET_SIZE(self) == 1 &&
2940 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002941 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002942
2943 /* Special case for empty strings */
2944 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002945 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002946
2947 e = p + PyString_GET_SIZE(self);
2948 for (; p < e; p++) {
2949 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002950 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002951 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002952 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002953}
2954
2955
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002956PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002957"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002958\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002959Return True if all characters in S are digits\n\
2960and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002961
2962static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002963string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002964{
Fred Drakeba096332000-07-09 07:04:36 +00002965 register const unsigned char *p
2966 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002967 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002968
Guido van Rossum4c08d552000-03-10 22:55:18 +00002969 /* Shortcut for single character strings */
2970 if (PyString_GET_SIZE(self) == 1 &&
2971 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002972 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002973
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002974 /* Special case for empty strings */
2975 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002976 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002977
Guido van Rossum4c08d552000-03-10 22:55:18 +00002978 e = p + PyString_GET_SIZE(self);
2979 for (; p < e; p++) {
2980 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002981 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002982 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002983 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002984}
2985
2986
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002987PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002988"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002989\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002990Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002991at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002992
2993static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002994string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002995{
Fred Drakeba096332000-07-09 07:04:36 +00002996 register const unsigned char *p
2997 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002998 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002999 int cased;
3000
Guido van Rossum4c08d552000-03-10 22:55:18 +00003001 /* Shortcut for single character strings */
3002 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003003 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003004
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003005 /* Special case for empty strings */
3006 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003007 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003008
Guido van Rossum4c08d552000-03-10 22:55:18 +00003009 e = p + PyString_GET_SIZE(self);
3010 cased = 0;
3011 for (; p < e; p++) {
3012 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003013 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003014 else if (!cased && islower(*p))
3015 cased = 1;
3016 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003017 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003018}
3019
3020
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003021PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003022"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003023\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003024Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003025at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003026
3027static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003028string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003029{
Fred Drakeba096332000-07-09 07:04:36 +00003030 register const unsigned char *p
3031 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003032 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003033 int cased;
3034
Guido van Rossum4c08d552000-03-10 22:55:18 +00003035 /* Shortcut for single character strings */
3036 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003037 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003038
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003039 /* Special case for empty strings */
3040 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003041 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003042
Guido van Rossum4c08d552000-03-10 22:55:18 +00003043 e = p + PyString_GET_SIZE(self);
3044 cased = 0;
3045 for (; p < e; p++) {
3046 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003047 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003048 else if (!cased && isupper(*p))
3049 cased = 1;
3050 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003051 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003052}
3053
3054
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003055PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003056"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003057\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003058Return True if S is a titlecased string and there is at least one\n\
3059character in S, i.e. uppercase characters may only follow uncased\n\
3060characters and lowercase characters only cased ones. Return False\n\
3061otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003062
3063static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003064string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003065{
Fred Drakeba096332000-07-09 07:04:36 +00003066 register const unsigned char *p
3067 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003068 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069 int cased, previous_is_cased;
3070
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071 /* Shortcut for single character strings */
3072 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003073 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003074
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003075 /* Special case for empty strings */
3076 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003077 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003078
Guido van Rossum4c08d552000-03-10 22:55:18 +00003079 e = p + PyString_GET_SIZE(self);
3080 cased = 0;
3081 previous_is_cased = 0;
3082 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003083 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003084
3085 if (isupper(ch)) {
3086 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003087 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003088 previous_is_cased = 1;
3089 cased = 1;
3090 }
3091 else if (islower(ch)) {
3092 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003093 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003094 previous_is_cased = 1;
3095 cased = 1;
3096 }
3097 else
3098 previous_is_cased = 0;
3099 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003100 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003101}
3102
3103
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003104PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003105"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003106\n\
3107Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003108Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003109is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003110
3111#define SPLIT_APPEND(data, left, right) \
3112 str = PyString_FromStringAndSize(data + left, right - left); \
3113 if (!str) \
3114 goto onError; \
3115 if (PyList_Append(list, str)) { \
3116 Py_DECREF(str); \
3117 goto onError; \
3118 } \
3119 else \
3120 Py_DECREF(str);
3121
3122static PyObject*
3123string_splitlines(PyStringObject *self, PyObject *args)
3124{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003125 register int i;
3126 register int j;
3127 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003128 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003129 PyObject *list;
3130 PyObject *str;
3131 char *data;
3132
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003133 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003134 return NULL;
3135
3136 data = PyString_AS_STRING(self);
3137 len = PyString_GET_SIZE(self);
3138
Guido van Rossum4c08d552000-03-10 22:55:18 +00003139 list = PyList_New(0);
3140 if (!list)
3141 goto onError;
3142
3143 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003144 int eol;
3145
Guido van Rossum4c08d552000-03-10 22:55:18 +00003146 /* Find a line and append it */
3147 while (i < len && data[i] != '\n' && data[i] != '\r')
3148 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003149
3150 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003151 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003152 if (i < len) {
3153 if (data[i] == '\r' && i + 1 < len &&
3154 data[i+1] == '\n')
3155 i += 2;
3156 else
3157 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003158 if (keepends)
3159 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003160 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003161 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003162 j = i;
3163 }
3164 if (j < len) {
3165 SPLIT_APPEND(data, j, len);
3166 }
3167
3168 return list;
3169
3170 onError:
3171 Py_DECREF(list);
3172 return NULL;
3173}
3174
3175#undef SPLIT_APPEND
3176
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003177static PyObject *
3178string_getnewargs(PyStringObject *v)
3179{
3180 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3181}
3182
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003183
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003184static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003185string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186 /* Counterparts of the obsolete stropmodule functions; except
3187 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003188 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3189 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003190 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003191 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3192 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003193 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3194 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3195 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3196 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3197 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3198 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3199 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003200 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3201 capitalize__doc__},
3202 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3203 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3204 endswith__doc__},
3205 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3206 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3207 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3208 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3209 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3210 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3211 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3212 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3213 startswith__doc__},
3214 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3215 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3216 swapcase__doc__},
3217 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3218 translate__doc__},
3219 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3220 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3221 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3222 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3223 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3224 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3225 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3226 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3227 expandtabs__doc__},
3228 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3229 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003230 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003231 {NULL, NULL} /* sentinel */
3232};
3233
Jeremy Hylton938ace62002-07-17 16:30:39 +00003234static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003235str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3236
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003237static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003238string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003239{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003240 PyObject *x = NULL;
3241 static char *kwlist[] = {"object", 0};
3242
Guido van Rossumae960af2001-08-30 03:11:59 +00003243 if (type != &PyString_Type)
3244 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003245 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3246 return NULL;
3247 if (x == NULL)
3248 return PyString_FromString("");
3249 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003250}
3251
Guido van Rossumae960af2001-08-30 03:11:59 +00003252static PyObject *
3253str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3254{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003255 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003256 int n;
3257
3258 assert(PyType_IsSubtype(type, &PyString_Type));
3259 tmp = string_new(&PyString_Type, args, kwds);
3260 if (tmp == NULL)
3261 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003262 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003263 n = PyString_GET_SIZE(tmp);
3264 pnew = type->tp_alloc(type, n);
3265 if (pnew != NULL) {
3266 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003267 ((PyStringObject *)pnew)->ob_shash =
3268 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003269 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003270 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003271 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003272 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003273}
3274
Guido van Rossumcacfc072002-05-24 19:01:59 +00003275static PyObject *
3276basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3277{
3278 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003279 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003280 return NULL;
3281}
3282
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003283static PyObject *
3284string_mod(PyObject *v, PyObject *w)
3285{
3286 if (!PyString_Check(v)) {
3287 Py_INCREF(Py_NotImplemented);
3288 return Py_NotImplemented;
3289 }
3290 return PyString_Format(v, w);
3291}
3292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003293PyDoc_STRVAR(basestring_doc,
3294"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003295
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003296static PyNumberMethods string_as_number = {
3297 0, /*nb_add*/
3298 0, /*nb_subtract*/
3299 0, /*nb_multiply*/
3300 0, /*nb_divide*/
3301 string_mod, /*nb_remainder*/
3302};
3303
3304
Guido van Rossumcacfc072002-05-24 19:01:59 +00003305PyTypeObject PyBaseString_Type = {
3306 PyObject_HEAD_INIT(&PyType_Type)
3307 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003308 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003309 0,
3310 0,
3311 0, /* tp_dealloc */
3312 0, /* tp_print */
3313 0, /* tp_getattr */
3314 0, /* tp_setattr */
3315 0, /* tp_compare */
3316 0, /* tp_repr */
3317 0, /* tp_as_number */
3318 0, /* tp_as_sequence */
3319 0, /* tp_as_mapping */
3320 0, /* tp_hash */
3321 0, /* tp_call */
3322 0, /* tp_str */
3323 0, /* tp_getattro */
3324 0, /* tp_setattro */
3325 0, /* tp_as_buffer */
3326 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3327 basestring_doc, /* tp_doc */
3328 0, /* tp_traverse */
3329 0, /* tp_clear */
3330 0, /* tp_richcompare */
3331 0, /* tp_weaklistoffset */
3332 0, /* tp_iter */
3333 0, /* tp_iternext */
3334 0, /* tp_methods */
3335 0, /* tp_members */
3336 0, /* tp_getset */
3337 &PyBaseObject_Type, /* tp_base */
3338 0, /* tp_dict */
3339 0, /* tp_descr_get */
3340 0, /* tp_descr_set */
3341 0, /* tp_dictoffset */
3342 0, /* tp_init */
3343 0, /* tp_alloc */
3344 basestring_new, /* tp_new */
3345 0, /* tp_free */
3346};
3347
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003348PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003349"str(object) -> string\n\
3350\n\
3351Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003352If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003353
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003354PyTypeObject PyString_Type = {
3355 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003356 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003357 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003358 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003359 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003360 (destructor)string_dealloc, /* tp_dealloc */
3361 (printfunc)string_print, /* tp_print */
3362 0, /* tp_getattr */
3363 0, /* tp_setattr */
3364 0, /* tp_compare */
3365 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003366 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003367 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003368 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003369 (hashfunc)string_hash, /* tp_hash */
3370 0, /* tp_call */
3371 (reprfunc)string_str, /* tp_str */
3372 PyObject_GenericGetAttr, /* tp_getattro */
3373 0, /* tp_setattro */
3374 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003375 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3376 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003377 string_doc, /* tp_doc */
3378 0, /* tp_traverse */
3379 0, /* tp_clear */
3380 (richcmpfunc)string_richcompare, /* tp_richcompare */
3381 0, /* tp_weaklistoffset */
3382 0, /* tp_iter */
3383 0, /* tp_iternext */
3384 string_methods, /* tp_methods */
3385 0, /* tp_members */
3386 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003387 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003388 0, /* tp_dict */
3389 0, /* tp_descr_get */
3390 0, /* tp_descr_set */
3391 0, /* tp_dictoffset */
3392 0, /* tp_init */
3393 0, /* tp_alloc */
3394 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003395 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003396};
3397
3398void
Fred Drakeba096332000-07-09 07:04:36 +00003399PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003400{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003401 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003402 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003403 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003404 if (w == NULL || !PyString_Check(*pv)) {
3405 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003406 *pv = NULL;
3407 return;
3408 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003409 v = string_concat((PyStringObject *) *pv, w);
3410 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003411 *pv = v;
3412}
3413
Guido van Rossum013142a1994-08-30 08:19:36 +00003414void
Fred Drakeba096332000-07-09 07:04:36 +00003415PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003416{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003417 PyString_Concat(pv, w);
3418 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003419}
3420
3421
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003422/* The following function breaks the notion that strings are immutable:
3423 it changes the size of a string. We get away with this only if there
3424 is only one module referencing the object. You can also think of it
3425 as creating a new string object and destroying the old one, only
3426 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003427 already be known to some other part of the code...
3428 Note that if there's not enough memory to resize the string, the original
3429 string object at *pv is deallocated, *pv is set to NULL, an "out of
3430 memory" exception is set, and -1 is returned. Else (on success) 0 is
3431 returned, and the value in *pv may or may not be the same as on input.
3432 As always, an extra byte is allocated for a trailing \0 byte (newsize
3433 does *not* include that), and a trailing \0 byte is stored.
3434*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003435
3436int
Fred Drakeba096332000-07-09 07:04:36 +00003437_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003438{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003439 register PyObject *v;
3440 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003441 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003442 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003443 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003444 Py_DECREF(v);
3445 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003446 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003447 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003448 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003449 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003450 _Py_ForgetReference(v);
3451 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003452 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003453 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003454 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003455 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003456 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003457 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003458 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003459 _Py_NewReference(*pv);
3460 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003461 sv->ob_size = newsize;
3462 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003463 return 0;
3464}
Guido van Rossume5372401993-03-16 12:15:04 +00003465
3466/* Helpers for formatstring */
3467
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003468static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003469getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003470{
3471 int argidx = *p_argidx;
3472 if (argidx < arglen) {
3473 (*p_argidx)++;
3474 if (arglen < 0)
3475 return args;
3476 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003477 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003478 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003479 PyErr_SetString(PyExc_TypeError,
3480 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003481 return NULL;
3482}
3483
Tim Peters38fd5b62000-09-21 05:43:11 +00003484/* Format codes
3485 * F_LJUST '-'
3486 * F_SIGN '+'
3487 * F_BLANK ' '
3488 * F_ALT '#'
3489 * F_ZERO '0'
3490 */
Guido van Rossume5372401993-03-16 12:15:04 +00003491#define F_LJUST (1<<0)
3492#define F_SIGN (1<<1)
3493#define F_BLANK (1<<2)
3494#define F_ALT (1<<3)
3495#define F_ZERO (1<<4)
3496
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003497static int
Fred Drakeba096332000-07-09 07:04:36 +00003498formatfloat(char *buf, size_t buflen, int flags,
3499 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003500{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003501 /* fmt = '%#.' + `prec` + `type`
3502 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003503 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003504 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003505 x = PyFloat_AsDouble(v);
3506 if (x == -1.0 && PyErr_Occurred()) {
3507 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003508 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003509 }
Guido van Rossume5372401993-03-16 12:15:04 +00003510 if (prec < 0)
3511 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003512 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3513 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003514 /* Worst case length calc to ensure no buffer overrun:
3515
3516 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003517 fmt = %#.<prec>g
3518 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003519 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003520 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003521
3522 'f' formats:
3523 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3524 len = 1 + 50 + 1 + prec = 52 + prec
3525
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003526 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003527 always given), therefore increase the length by one.
3528
3529 */
3530 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3531 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003532 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003533 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003534 return -1;
3535 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003536 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3537 (flags&F_ALT) ? "#" : "",
3538 prec, type);
Tim Peters885d4572001-11-28 20:27:42 +00003539 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003540 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003541}
3542
Tim Peters38fd5b62000-09-21 05:43:11 +00003543/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3544 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3545 * Python's regular ints.
3546 * Return value: a new PyString*, or NULL if error.
3547 * . *pbuf is set to point into it,
3548 * *plen set to the # of chars following that.
3549 * Caller must decref it when done using pbuf.
3550 * The string starting at *pbuf is of the form
3551 * "-"? ("0x" | "0X")? digit+
3552 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003553 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003554 * There will be at least prec digits, zero-filled on the left if
3555 * necessary to get that many.
3556 * val object to be converted
3557 * flags bitmask of format flags; only F_ALT is looked at
3558 * prec minimum number of digits; 0-fill on left if needed
3559 * type a character in [duoxX]; u acts the same as d
3560 *
3561 * CAUTION: o, x and X conversions on regular ints can never
3562 * produce a '-' sign, but can for Python's unbounded ints.
3563 */
3564PyObject*
3565_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3566 char **pbuf, int *plen)
3567{
3568 PyObject *result = NULL;
3569 char *buf;
3570 int i;
3571 int sign; /* 1 if '-', else 0 */
3572 int len; /* number of characters */
3573 int numdigits; /* len == numnondigits + numdigits */
3574 int numnondigits = 0;
3575
3576 switch (type) {
3577 case 'd':
3578 case 'u':
3579 result = val->ob_type->tp_str(val);
3580 break;
3581 case 'o':
3582 result = val->ob_type->tp_as_number->nb_oct(val);
3583 break;
3584 case 'x':
3585 case 'X':
3586 numnondigits = 2;
3587 result = val->ob_type->tp_as_number->nb_hex(val);
3588 break;
3589 default:
3590 assert(!"'type' not in [duoxX]");
3591 }
3592 if (!result)
3593 return NULL;
3594
3595 /* To modify the string in-place, there can only be one reference. */
3596 if (result->ob_refcnt != 1) {
3597 PyErr_BadInternalCall();
3598 return NULL;
3599 }
3600 buf = PyString_AsString(result);
3601 len = PyString_Size(result);
3602 if (buf[len-1] == 'L') {
3603 --len;
3604 buf[len] = '\0';
3605 }
3606 sign = buf[0] == '-';
3607 numnondigits += sign;
3608 numdigits = len - numnondigits;
3609 assert(numdigits > 0);
3610
Tim Petersfff53252001-04-12 18:38:48 +00003611 /* Get rid of base marker unless F_ALT */
3612 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003613 /* Need to skip 0x, 0X or 0. */
3614 int skipped = 0;
3615 switch (type) {
3616 case 'o':
3617 assert(buf[sign] == '0');
3618 /* If 0 is only digit, leave it alone. */
3619 if (numdigits > 1) {
3620 skipped = 1;
3621 --numdigits;
3622 }
3623 break;
3624 case 'x':
3625 case 'X':
3626 assert(buf[sign] == '0');
3627 assert(buf[sign + 1] == 'x');
3628 skipped = 2;
3629 numnondigits -= 2;
3630 break;
3631 }
3632 if (skipped) {
3633 buf += skipped;
3634 len -= skipped;
3635 if (sign)
3636 buf[0] = '-';
3637 }
3638 assert(len == numnondigits + numdigits);
3639 assert(numdigits > 0);
3640 }
3641
3642 /* Fill with leading zeroes to meet minimum width. */
3643 if (prec > numdigits) {
3644 PyObject *r1 = PyString_FromStringAndSize(NULL,
3645 numnondigits + prec);
3646 char *b1;
3647 if (!r1) {
3648 Py_DECREF(result);
3649 return NULL;
3650 }
3651 b1 = PyString_AS_STRING(r1);
3652 for (i = 0; i < numnondigits; ++i)
3653 *b1++ = *buf++;
3654 for (i = 0; i < prec - numdigits; i++)
3655 *b1++ = '0';
3656 for (i = 0; i < numdigits; i++)
3657 *b1++ = *buf++;
3658 *b1 = '\0';
3659 Py_DECREF(result);
3660 result = r1;
3661 buf = PyString_AS_STRING(result);
3662 len = numnondigits + prec;
3663 }
3664
3665 /* Fix up case for hex conversions. */
3666 switch (type) {
3667 case 'x':
3668 /* Need to convert all upper case letters to lower case. */
3669 for (i = 0; i < len; i++)
3670 if (buf[i] >= 'A' && buf[i] <= 'F')
3671 buf[i] += 'a'-'A';
3672 break;
3673 case 'X':
3674 /* Need to convert 0x to 0X (and -0x to -0X). */
3675 if (buf[sign + 1] == 'x')
3676 buf[sign + 1] = 'X';
3677 break;
3678 }
3679 *pbuf = buf;
3680 *plen = len;
3681 return result;
3682}
3683
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003684static int
Fred Drakeba096332000-07-09 07:04:36 +00003685formatint(char *buf, size_t buflen, int flags,
3686 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003687{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003688 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003689 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3690 + 1 + 1 = 24 */
3691 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003692 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003693 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003694
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003695 x = PyInt_AsLong(v);
3696 if (x == -1 && PyErr_Occurred()) {
3697 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003698 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003699 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003700 if (x < 0 && type == 'u') {
3701 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003702 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003703 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3704 sign = "-";
3705 else
3706 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003707 if (prec < 0)
3708 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003709
3710 if ((flags & F_ALT) &&
3711 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003712 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003713 * of issues that cause pain:
3714 * - when 0 is being converted, the C standard leaves off
3715 * the '0x' or '0X', which is inconsistent with other
3716 * %#x/%#X conversions and inconsistent with Python's
3717 * hex() function
3718 * - there are platforms that violate the standard and
3719 * convert 0 with the '0x' or '0X'
3720 * (Metrowerks, Compaq Tru64)
3721 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003722 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003723 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003724 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003725 * We can achieve the desired consistency by inserting our
3726 * own '0x' or '0X' prefix, and substituting %x/%X in place
3727 * of %#x/%#X.
3728 *
3729 * Note that this is the same approach as used in
3730 * formatint() in unicodeobject.c
3731 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003732 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3733 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003734 }
3735 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003736 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3737 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003738 prec, type);
3739 }
3740
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003741 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3742 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003743 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003744 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003745 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003746 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003747 return -1;
3748 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003749 if (sign[0])
3750 PyOS_snprintf(buf, buflen, fmt, -x);
3751 else
3752 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003753 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003754}
3755
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003756static int
Fred Drakeba096332000-07-09 07:04:36 +00003757formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003758{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003759 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003760 if (PyString_Check(v)) {
3761 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003762 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003763 }
3764 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003765 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003766 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003767 }
3768 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003769 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003770}
3771
Guido van Rossum013142a1994-08-30 08:19:36 +00003772
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003773/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3774
3775 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3776 chars are formatted. XXX This is a magic number. Each formatting
3777 routine does bounds checking to ensure no overflow, but a better
3778 solution may be to malloc a buffer of appropriate size for each
3779 format. For now, the current solution is sufficient.
3780*/
3781#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003782
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003783PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003784PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003785{
3786 char *fmt, *res;
3787 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003788 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003789 PyObject *result, *orig_args;
3790#ifdef Py_USING_UNICODE
3791 PyObject *v, *w;
3792#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003793 PyObject *dict = NULL;
3794 if (format == NULL || !PyString_Check(format) || args == NULL) {
3795 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003796 return NULL;
3797 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003798 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003799 fmt = PyString_AS_STRING(format);
3800 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003801 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003802 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003803 if (result == NULL)
3804 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003805 res = PyString_AsString(result);
3806 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003807 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003808 argidx = 0;
3809 }
3810 else {
3811 arglen = -1;
3812 argidx = -2;
3813 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003814 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3815 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003816 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003817 while (--fmtcnt >= 0) {
3818 if (*fmt != '%') {
3819 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003820 rescnt = fmtcnt + 100;
3821 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003822 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003823 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003824 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003825 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003826 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003827 }
3828 *res++ = *fmt++;
3829 }
3830 else {
3831 /* Got a format specifier */
3832 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003833 int width = -1;
3834 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003835 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003836 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003837 PyObject *v = NULL;
3838 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003839 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003840 int sign;
3841 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003842 char formatbuf[FORMATBUFLEN];
3843 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003844#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003845 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003846 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003847#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003848
Guido van Rossumda9c2711996-12-05 21:58:58 +00003849 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003850 if (*fmt == '(') {
3851 char *keystart;
3852 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003853 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003854 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003855
3856 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003857 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003858 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003859 goto error;
3860 }
3861 ++fmt;
3862 --fmtcnt;
3863 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003864 /* Skip over balanced parentheses */
3865 while (pcount > 0 && --fmtcnt >= 0) {
3866 if (*fmt == ')')
3867 --pcount;
3868 else if (*fmt == '(')
3869 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003870 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003871 }
3872 keylen = fmt - keystart - 1;
3873 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003874 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003875 "incomplete format key");
3876 goto error;
3877 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003878 key = PyString_FromStringAndSize(keystart,
3879 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003880 if (key == NULL)
3881 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003882 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003883 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003884 args_owned = 0;
3885 }
3886 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003887 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003888 if (args == NULL) {
3889 goto error;
3890 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003891 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003892 arglen = -1;
3893 argidx = -2;
3894 }
Guido van Rossume5372401993-03-16 12:15:04 +00003895 while (--fmtcnt >= 0) {
3896 switch (c = *fmt++) {
3897 case '-': flags |= F_LJUST; continue;
3898 case '+': flags |= F_SIGN; continue;
3899 case ' ': flags |= F_BLANK; continue;
3900 case '#': flags |= F_ALT; continue;
3901 case '0': flags |= F_ZERO; continue;
3902 }
3903 break;
3904 }
3905 if (c == '*') {
3906 v = getnextarg(args, arglen, &argidx);
3907 if (v == NULL)
3908 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003909 if (!PyInt_Check(v)) {
3910 PyErr_SetString(PyExc_TypeError,
3911 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003912 goto error;
3913 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003914 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003915 if (width < 0) {
3916 flags |= F_LJUST;
3917 width = -width;
3918 }
Guido van Rossume5372401993-03-16 12:15:04 +00003919 if (--fmtcnt >= 0)
3920 c = *fmt++;
3921 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003922 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003923 width = c - '0';
3924 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003925 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003926 if (!isdigit(c))
3927 break;
3928 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003929 PyErr_SetString(
3930 PyExc_ValueError,
3931 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003932 goto error;
3933 }
3934 width = width*10 + (c - '0');
3935 }
3936 }
3937 if (c == '.') {
3938 prec = 0;
3939 if (--fmtcnt >= 0)
3940 c = *fmt++;
3941 if (c == '*') {
3942 v = getnextarg(args, arglen, &argidx);
3943 if (v == NULL)
3944 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003945 if (!PyInt_Check(v)) {
3946 PyErr_SetString(
3947 PyExc_TypeError,
3948 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003949 goto error;
3950 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003951 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003952 if (prec < 0)
3953 prec = 0;
3954 if (--fmtcnt >= 0)
3955 c = *fmt++;
3956 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003957 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003958 prec = c - '0';
3959 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003960 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003961 if (!isdigit(c))
3962 break;
3963 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003964 PyErr_SetString(
3965 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003966 "prec too big");
3967 goto error;
3968 }
3969 prec = prec*10 + (c - '0');
3970 }
3971 }
3972 } /* prec */
3973 if (fmtcnt >= 0) {
3974 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003975 if (--fmtcnt >= 0)
3976 c = *fmt++;
3977 }
3978 }
3979 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003980 PyErr_SetString(PyExc_ValueError,
3981 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003982 goto error;
3983 }
3984 if (c != '%') {
3985 v = getnextarg(args, arglen, &argidx);
3986 if (v == NULL)
3987 goto error;
3988 }
3989 sign = 0;
3990 fill = ' ';
3991 switch (c) {
3992 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003993 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003994 len = 1;
3995 break;
3996 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003997#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003998 if (PyUnicode_Check(v)) {
3999 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004000 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004001 goto unicode;
4002 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004003#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004004 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004005 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004006 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00004007 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004008 else
4009 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004010 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004011 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004012 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00004013 /* XXX Note: this should never happen,
4014 since PyObject_Repr() and
4015 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004016 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004017 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004018 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004019 goto error;
4020 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004021 pbuf = PyString_AS_STRING(temp);
4022 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004023 if (prec >= 0 && len > prec)
4024 len = prec;
4025 break;
4026 case 'i':
4027 case 'd':
4028 case 'u':
4029 case 'o':
4030 case 'x':
4031 case 'X':
4032 if (c == 'i')
4033 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004034 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004035 temp = _PyString_FormatLong(v, flags,
4036 prec, c, &pbuf, &len);
4037 if (!temp)
4038 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004039 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004040 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004041 else {
4042 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004043 len = formatint(pbuf,
4044 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004045 flags, prec, c, v);
4046 if (len < 0)
4047 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004048 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004049 }
4050 if (flags & F_ZERO)
4051 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004052 break;
4053 case 'e':
4054 case 'E':
4055 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004056 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004057 case 'g':
4058 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004059 if (c == 'F')
4060 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004061 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004062 len = formatfloat(pbuf, sizeof(formatbuf),
4063 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004064 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004065 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004066 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004067 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004068 fill = '0';
4069 break;
4070 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004071#ifdef Py_USING_UNICODE
4072 if (PyUnicode_Check(v)) {
4073 fmt = fmt_start;
4074 argidx = argidx_start;
4075 goto unicode;
4076 }
4077#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004078 pbuf = formatbuf;
4079 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004080 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004081 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004082 break;
4083 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004084 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004085 "unsupported format character '%c' (0x%x) "
4086 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004087 c, c,
4088 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004089 goto error;
4090 }
4091 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004092 if (*pbuf == '-' || *pbuf == '+') {
4093 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004094 len--;
4095 }
4096 else if (flags & F_SIGN)
4097 sign = '+';
4098 else if (flags & F_BLANK)
4099 sign = ' ';
4100 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004101 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004102 }
4103 if (width < len)
4104 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004105 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004106 reslen -= rescnt;
4107 rescnt = width + fmtcnt + 100;
4108 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004109 if (reslen < 0) {
4110 Py_DECREF(result);
4111 return PyErr_NoMemory();
4112 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004113 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004114 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004115 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004116 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004117 }
4118 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004119 if (fill != ' ')
4120 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004121 rescnt--;
4122 if (width > len)
4123 width--;
4124 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004125 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4126 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004127 assert(pbuf[1] == c);
4128 if (fill != ' ') {
4129 *res++ = *pbuf++;
4130 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004131 }
Tim Petersfff53252001-04-12 18:38:48 +00004132 rescnt -= 2;
4133 width -= 2;
4134 if (width < 0)
4135 width = 0;
4136 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004137 }
4138 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004139 do {
4140 --rescnt;
4141 *res++ = fill;
4142 } while (--width > len);
4143 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004144 if (fill == ' ') {
4145 if (sign)
4146 *res++ = sign;
4147 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004148 (c == 'x' || c == 'X')) {
4149 assert(pbuf[0] == '0');
4150 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004151 *res++ = *pbuf++;
4152 *res++ = *pbuf++;
4153 }
4154 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004155 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004156 res += len;
4157 rescnt -= len;
4158 while (--width >= len) {
4159 --rescnt;
4160 *res++ = ' ';
4161 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004162 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004163 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004164 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004165 goto error;
4166 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004167 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004168 } /* '%' */
4169 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004170 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004171 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004172 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004173 goto error;
4174 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004175 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004176 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004177 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004178 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004179 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004180
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004181#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004182 unicode:
4183 if (args_owned) {
4184 Py_DECREF(args);
4185 args_owned = 0;
4186 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004187 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004188 if (PyTuple_Check(orig_args) && argidx > 0) {
4189 PyObject *v;
4190 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4191 v = PyTuple_New(n);
4192 if (v == NULL)
4193 goto error;
4194 while (--n >= 0) {
4195 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4196 Py_INCREF(w);
4197 PyTuple_SET_ITEM(v, n, w);
4198 }
4199 args = v;
4200 } else {
4201 Py_INCREF(orig_args);
4202 args = orig_args;
4203 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004204 args_owned = 1;
4205 /* Take what we have of the result and let the Unicode formatting
4206 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004207 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004208 if (_PyString_Resize(&result, rescnt))
4209 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004210 fmtcnt = PyString_GET_SIZE(format) - \
4211 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004212 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4213 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004214 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004215 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004216 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004217 if (v == NULL)
4218 goto error;
4219 /* Paste what we have (result) to what the Unicode formatting
4220 function returned (v) and return the result (or error) */
4221 w = PyUnicode_Concat(result, v);
4222 Py_DECREF(result);
4223 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004224 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004225 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004226#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004227
Guido van Rossume5372401993-03-16 12:15:04 +00004228 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004229 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004230 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004231 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004232 }
Guido van Rossume5372401993-03-16 12:15:04 +00004233 return NULL;
4234}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004235
Guido van Rossum2a61e741997-01-18 07:55:05 +00004236void
Fred Drakeba096332000-07-09 07:04:36 +00004237PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004238{
4239 register PyStringObject *s = (PyStringObject *)(*p);
4240 PyObject *t;
4241 if (s == NULL || !PyString_Check(s))
4242 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004243 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004244 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004245 if (interned == NULL) {
4246 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004247 if (interned == NULL) {
4248 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004249 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004250 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004251 }
4252 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4253 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004254 Py_DECREF(*p);
4255 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004256 return;
4257 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004258 /* Ensure that only true string objects appear in the intern dict */
4259 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004260 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4261 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004262 if (t == NULL) {
4263 PyErr_Clear();
4264 return;
Tim Peters111f6092001-09-12 07:54:51 +00004265 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004266 } else {
4267 t = (PyObject*) s;
4268 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004269 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004270
4271 if (PyDict_SetItem(interned, t, t) == 0) {
4272 /* The two references in interned are not counted by
4273 refcnt. The string deallocator will take care of this */
4274 ((PyObject *)t)->ob_refcnt-=2;
4275 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4276 Py_DECREF(*p);
4277 *p = t;
4278 return;
4279 }
4280 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004281 PyErr_Clear();
4282}
4283
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004284void
4285PyString_InternImmortal(PyObject **p)
4286{
4287 PyString_InternInPlace(p);
4288 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4289 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4290 Py_INCREF(*p);
4291 }
4292}
4293
Guido van Rossum2a61e741997-01-18 07:55:05 +00004294
4295PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004296PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004297{
4298 PyObject *s = PyString_FromString(cp);
4299 if (s == NULL)
4300 return NULL;
4301 PyString_InternInPlace(&s);
4302 return s;
4303}
4304
Guido van Rossum8cf04761997-08-02 02:57:45 +00004305void
Fred Drakeba096332000-07-09 07:04:36 +00004306PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004307{
4308 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004309 for (i = 0; i < UCHAR_MAX + 1; i++) {
4310 Py_XDECREF(characters[i]);
4311 characters[i] = NULL;
4312 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004313 Py_XDECREF(nullstring);
4314 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004315}
Barry Warsawa903ad982001-02-23 16:40:48 +00004316
Barry Warsawa903ad982001-02-23 16:40:48 +00004317void _Py_ReleaseInternedStrings(void)
4318{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004319 PyObject *keys;
4320 PyStringObject *s;
4321 int i, n;
4322
4323 if (interned == NULL || !PyDict_Check(interned))
4324 return;
4325 keys = PyDict_Keys(interned);
4326 if (keys == NULL || !PyList_Check(keys)) {
4327 PyErr_Clear();
4328 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004329 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004330
4331 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4332 detector, interned strings are not forcibly deallocated; rather, we
4333 give them their stolen references back, and then clear and DECREF
4334 the interned dict. */
4335
4336 fprintf(stderr, "releasing interned strings\n");
4337 n = PyList_GET_SIZE(keys);
4338 for (i = 0; i < n; i++) {
4339 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4340 switch (s->ob_sstate) {
4341 case SSTATE_NOT_INTERNED:
4342 /* XXX Shouldn't happen */
4343 break;
4344 case SSTATE_INTERNED_IMMORTAL:
4345 s->ob_refcnt += 1;
4346 break;
4347 case SSTATE_INTERNED_MORTAL:
4348 s->ob_refcnt += 2;
4349 break;
4350 default:
4351 Py_FatalError("Inconsistent interned string state.");
4352 }
4353 s->ob_sstate = SSTATE_NOT_INTERNED;
4354 }
4355 Py_DECREF(keys);
4356 PyDict_Clear(interned);
4357 Py_DECREF(interned);
4358 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004359}