blob: 6a9450a21d97cfbc905850de02a904eb98aeabbc [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Martin v. Löwisbab95592002-10-07 18:26:16 +000029#if defined(HAVE_MBTOWC) && defined(HAVE_WCHAR_H) && defined(HAVE_WCTYPE_H)
Martin v. Löwisfed24052002-10-07 13:55:50 +000030# define PRINT_MULTIBYTE_STRING
31# include <locale.h>
32# include <wchar.h>
Martin v. Löwisbab95592002-10-07 18:26:16 +000033# include <wctype.h>
Martin v. Löwisfed24052002-10-07 13:55:50 +000034# if defined(HAVE_ISWPRINT)
35# define _isprint iswprint
36# else
37# define _isprint isprint
38# endif
39#endif
40
41static const char *hexchars = "0123456789abcdef";
42
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 For both PyString_FromString() and PyString_FromStringAndSize(), the
45 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000046 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000049 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000050
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000051 For PyString_FromStringAndSize(), the parameter the parameter `str' is
52 either NULL or else points to a string containing at least `size' bytes.
53 For PyString_FromStringAndSize(), the string in the `str' parameter does
54 not have to be null-terminated. (Therefore it is safe to construct a
55 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
56 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
57 bytes (setting the last byte to the null terminating character) and you can
58 fill in the data yourself. If `str' is non-NULL then the resulting
59 PyString object must be treated as immutable and you must not fill in nor
60 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000061
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000062 The PyObject member `op->ob_size', which denotes the number of "extra
63 items" in a variable-size object, will contain the number of bytes
64 allocated for string data, not counting the null terminating character. It
65 is therefore equal to the equal to the `size' parameter (for
66 PyString_FromStringAndSize()) or the length of the string in the `str'
67 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000070PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000071{
Tim Peters9e897f42001-05-09 07:37:07 +000072 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000073 if (size == 0 && (op = nullstring) != NULL) {
74#ifdef COUNT_ALLOCS
75 null_strings++;
76#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 Py_INCREF(op);
78 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 if (size == 1 && str != NULL &&
81 (op = characters[*str & UCHAR_MAX]) != NULL)
82 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083#ifdef COUNT_ALLOCS
84 one_strings++;
85#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
87 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000089
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000090 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000091 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000092 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000093 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000094 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000095 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000097 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000098 if (str != NULL)
99 memcpy(op->ob_sval, str, size);
100 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +0000101 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000102 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000103 PyObject *t = (PyObject *)op;
104 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000105 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000108 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +0000109 PyObject *t = (PyObject *)op;
110 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000111 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000113 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000115 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000116}
117
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000119PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000120{
Tim Peters62de65b2001-12-06 20:29:32 +0000121 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000122 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000123
124 assert(str != NULL);
125 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000126 if (size > INT_MAX) {
127 PyErr_SetString(PyExc_OverflowError,
128 "string is too long for a Python string");
129 return NULL;
130 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0 && (op = nullstring) != NULL) {
132#ifdef COUNT_ALLOCS
133 null_strings++;
134#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000135 Py_INCREF(op);
136 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 }
138 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
139#ifdef COUNT_ALLOCS
140 one_strings++;
141#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
143 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000145
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000146 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000148 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000149 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000151 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000153 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000154 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000155 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000156 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000157 PyObject *t = (PyObject *)op;
158 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000159 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000160 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000161 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000162 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000163 PyObject *t = (PyObject *)op;
164 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000165 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000166 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000167 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000168 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000169 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000170}
171
Barry Warsawdadace02001-08-24 18:32:06 +0000172PyObject *
173PyString_FromFormatV(const char *format, va_list vargs)
174{
Tim Petersc15c4f12001-10-02 21:32:07 +0000175 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000176 int n = 0;
177 const char* f;
178 char *s;
179 PyObject* string;
180
Tim Petersc15c4f12001-10-02 21:32:07 +0000181#ifdef VA_LIST_IS_ARRAY
182 memcpy(count, vargs, sizeof(va_list));
183#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000184#ifdef __va_copy
185 __va_copy(count, vargs);
186#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000187 count = vargs;
188#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000189#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000190 /* step 1: figure out how large a buffer we need */
191 for (f = format; *f; f++) {
192 if (*f == '%') {
193 const char* p = f;
194 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
195 ;
196
197 /* skip the 'l' in %ld, since it doesn't change the
198 width. although only %d is supported (see
199 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000200 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000201 if (*f == 'l' && *(f+1) == 'd')
202 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000203
Barry Warsawdadace02001-08-24 18:32:06 +0000204 switch (*f) {
205 case 'c':
206 (void)va_arg(count, int);
207 /* fall through... */
208 case '%':
209 n++;
210 break;
211 case 'd': case 'i': case 'x':
212 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 /* 20 bytes is enough to hold a 64-bit
214 integer. Decimal takes the most space.
215 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000216 n += 20;
217 break;
218 case 's':
219 s = va_arg(count, char*);
220 n += strlen(s);
221 break;
222 case 'p':
223 (void) va_arg(count, int);
224 /* maximum 64-bit pointer representation:
225 * 0xffffffffffffffff
226 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000227 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000228 */
229 n += 19;
230 break;
231 default:
232 /* if we stumble upon an unknown
233 formatting code, copy the rest of
234 the format string to the output
235 string. (we cannot just skip the
236 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000238 n += strlen(p);
239 goto expand;
240 }
241 } else
242 n++;
243 }
244 expand:
245 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000246 /* Since we've analyzed how much space we need for the worst case,
247 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000248 string = PyString_FromStringAndSize(NULL, n);
249 if (!string)
250 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000251
Barry Warsawdadace02001-08-24 18:32:06 +0000252 s = PyString_AsString(string);
253
254 for (f = format; *f; f++) {
255 if (*f == '%') {
256 const char* p = f++;
257 int i, longflag = 0;
258 /* parse the width.precision part (we're only
259 interested in the precision value, if any) */
260 n = 0;
261 while (isdigit(Py_CHARMASK(*f)))
262 n = (n*10) + *f++ - '0';
263 if (*f == '.') {
264 f++;
265 n = 0;
266 while (isdigit(Py_CHARMASK(*f)))
267 n = (n*10) + *f++ - '0';
268 }
269 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
270 f++;
271 /* handle the long flag, but only for %ld. others
272 can be added when necessary. */
273 if (*f == 'l' && *(f+1) == 'd') {
274 longflag = 1;
275 ++f;
276 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000277
Barry Warsawdadace02001-08-24 18:32:06 +0000278 switch (*f) {
279 case 'c':
280 *s++ = va_arg(vargs, int);
281 break;
282 case 'd':
283 if (longflag)
284 sprintf(s, "%ld", va_arg(vargs, long));
285 else
286 sprintf(s, "%d", va_arg(vargs, int));
287 s += strlen(s);
288 break;
289 case 'i':
290 sprintf(s, "%i", va_arg(vargs, int));
291 s += strlen(s);
292 break;
293 case 'x':
294 sprintf(s, "%x", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 's':
298 p = va_arg(vargs, char*);
299 i = strlen(p);
300 if (n > 0 && i > n)
301 i = n;
302 memcpy(s, p, i);
303 s += i;
304 break;
305 case 'p':
306 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000307 /* %p is ill-defined: ensure leading 0x. */
308 if (s[1] == 'X')
309 s[1] = 'x';
310 else if (s[1] != 'x') {
311 memmove(s+2, s, strlen(s)+1);
312 s[0] = '0';
313 s[1] = 'x';
314 }
Barry Warsawdadace02001-08-24 18:32:06 +0000315 s += strlen(s);
316 break;
317 case '%':
318 *s++ = '%';
319 break;
320 default:
321 strcpy(s, p);
322 s += strlen(s);
323 goto end;
324 }
325 } else
326 *s++ = *f;
327 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000328
Barry Warsawdadace02001-08-24 18:32:06 +0000329 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000330 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000331 return string;
332}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000333
Barry Warsawdadace02001-08-24 18:32:06 +0000334PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000335PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000336{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000337 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000338 va_list vargs;
339
340#ifdef HAVE_STDARG_PROTOTYPES
341 va_start(vargs, format);
342#else
343 va_start(vargs);
344#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000345 ret = PyString_FromFormatV(format, vargs);
346 va_end(vargs);
347 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000348}
349
350
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000351PyObject *PyString_Decode(const char *s,
352 int size,
353 const char *encoding,
354 const char *errors)
355{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000356 PyObject *v, *str;
357
358 str = PyString_FromStringAndSize(s, size);
359 if (str == NULL)
360 return NULL;
361 v = PyString_AsDecodedString(str, encoding, errors);
362 Py_DECREF(str);
363 return v;
364}
365
366PyObject *PyString_AsDecodedObject(PyObject *str,
367 const char *encoding,
368 const char *errors)
369{
370 PyObject *v;
371
372 if (!PyString_Check(str)) {
373 PyErr_BadArgument();
374 goto onError;
375 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000376
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000377 if (encoding == NULL) {
378#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000380#else
381 PyErr_SetString(PyExc_ValueError, "no encoding specified");
382 goto onError;
383#endif
384 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000385
386 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000387 v = PyCodec_Decode(str, encoding, errors);
388 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000390
391 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000392
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 return NULL;
395}
396
397PyObject *PyString_AsDecodedString(PyObject *str,
398 const char *encoding,
399 const char *errors)
400{
401 PyObject *v;
402
403 v = PyString_AsDecodedObject(str, encoding, errors);
404 if (v == NULL)
405 goto onError;
406
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000407#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000408 /* Convert Unicode to a string using the default encoding */
409 if (PyUnicode_Check(v)) {
410 PyObject *temp = v;
411 v = PyUnicode_AsEncodedString(v, NULL, NULL);
412 Py_DECREF(temp);
413 if (v == NULL)
414 goto onError;
415 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000416#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000417 if (!PyString_Check(v)) {
418 PyErr_Format(PyExc_TypeError,
419 "decoder did not return a string object (type=%.400s)",
420 v->ob_type->tp_name);
421 Py_DECREF(v);
422 goto onError;
423 }
424
425 return v;
426
427 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000428 return NULL;
429}
430
431PyObject *PyString_Encode(const char *s,
432 int size,
433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 str = PyString_FromStringAndSize(s, size);
439 if (str == NULL)
440 return NULL;
441 v = PyString_AsEncodedString(str, encoding, errors);
442 Py_DECREF(str);
443 return v;
444}
445
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000446PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000447 const char *encoding,
448 const char *errors)
449{
450 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000451
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000452 if (!PyString_Check(str)) {
453 PyErr_BadArgument();
454 goto onError;
455 }
456
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000457 if (encoding == NULL) {
458#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000460#else
461 PyErr_SetString(PyExc_ValueError, "no encoding specified");
462 goto onError;
463#endif
464 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000465
466 /* Encode via the codec registry */
467 v = PyCodec_Encode(str, encoding, errors);
468 if (v == NULL)
469 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470
471 return v;
472
473 onError:
474 return NULL;
475}
476
477PyObject *PyString_AsEncodedString(PyObject *str,
478 const char *encoding,
479 const char *errors)
480{
481 PyObject *v;
482
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000483 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000484 if (v == NULL)
485 goto onError;
486
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000487#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000488 /* Convert Unicode to a string using the default encoding */
489 if (PyUnicode_Check(v)) {
490 PyObject *temp = v;
491 v = PyUnicode_AsEncodedString(v, NULL, NULL);
492 Py_DECREF(temp);
493 if (v == NULL)
494 goto onError;
495 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000496#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 if (!PyString_Check(v)) {
498 PyErr_Format(PyExc_TypeError,
499 "encoder did not return a string object (type=%.400s)",
500 v->ob_type->tp_name);
501 Py_DECREF(v);
502 goto onError;
503 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000504
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000505 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000506
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000507 onError:
508 return NULL;
509}
510
Guido van Rossum234f9421993-06-17 12:35:49 +0000511static void
Fred Drakeba096332000-07-09 07:04:36 +0000512string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000513{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000514 switch (PyString_CHECK_INTERNED(op)) {
515 case SSTATE_NOT_INTERNED:
516 break;
517
518 case SSTATE_INTERNED_MORTAL:
519 /* revive dead object temporarily for DelItem */
520 op->ob_refcnt = 3;
521 if (PyDict_DelItem(interned, op) != 0)
522 Py_FatalError(
523 "deletion of interned string failed");
524 break;
525
526 case SSTATE_INTERNED_IMMORTAL:
527 Py_FatalError("Immortal interned string died.");
528
529 default:
530 Py_FatalError("Inconsistent interned string state.");
531 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000532 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000533}
534
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000535/* Unescape a backslash-escaped string. If unicode is non-zero,
536 the string is a u-literal. If recode_encoding is non-zero,
537 the string is UTF-8 encoded and should be re-encoded in the
538 specified encoding. */
539
540PyObject *PyString_DecodeEscape(const char *s,
541 int len,
542 const char *errors,
543 int unicode,
544 const char *recode_encoding)
545{
546 int c;
547 char *p, *buf;
548 const char *end;
549 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000550 int newlen = recode_encoding ? 4*len:len;
551 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000552 if (v == NULL)
553 return NULL;
554 p = buf = PyString_AsString(v);
555 end = s + len;
556 while (s < end) {
557 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000558 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000559#ifdef Py_USING_UNICODE
560 if (recode_encoding && (*s & 0x80)) {
561 PyObject *u, *w;
562 char *r;
563 const char* t;
564 int rn;
565 t = s;
566 /* Decode non-ASCII bytes as UTF-8. */
567 while (t < end && (*t & 0x80)) t++;
568 u = PyUnicode_DecodeUTF8(s, t - s, errors);
569 if(!u) goto failed;
570
571 /* Recode them in target encoding. */
572 w = PyUnicode_AsEncodedString(
573 u, recode_encoding, errors);
574 Py_DECREF(u);
575 if (!w) goto failed;
576
577 /* Append bytes to output buffer. */
578 r = PyString_AsString(w);
579 rn = PyString_Size(w);
580 memcpy(p, r, rn);
581 p += rn;
582 Py_DECREF(w);
583 s = t;
584 } else {
585 *p++ = *s++;
586 }
587#else
588 *p++ = *s++;
589#endif
590 continue;
591 }
592 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000593 if (s==end) {
594 PyErr_SetString(PyExc_ValueError,
595 "Trailing \\ in string");
596 goto failed;
597 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000598 switch (*s++) {
599 /* XXX This assumes ASCII! */
600 case '\n': break;
601 case '\\': *p++ = '\\'; break;
602 case '\'': *p++ = '\''; break;
603 case '\"': *p++ = '\"'; break;
604 case 'b': *p++ = '\b'; break;
605 case 'f': *p++ = '\014'; break; /* FF */
606 case 't': *p++ = '\t'; break;
607 case 'n': *p++ = '\n'; break;
608 case 'r': *p++ = '\r'; break;
609 case 'v': *p++ = '\013'; break; /* VT */
610 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
611 case '0': case '1': case '2': case '3':
612 case '4': case '5': case '6': case '7':
613 c = s[-1] - '0';
614 if ('0' <= *s && *s <= '7') {
615 c = (c<<3) + *s++ - '0';
616 if ('0' <= *s && *s <= '7')
617 c = (c<<3) + *s++ - '0';
618 }
619 *p++ = c;
620 break;
621 case 'x':
622 if (isxdigit(Py_CHARMASK(s[0]))
623 && isxdigit(Py_CHARMASK(s[1]))) {
624 unsigned int x = 0;
625 c = Py_CHARMASK(*s);
626 s++;
627 if (isdigit(c))
628 x = c - '0';
629 else if (islower(c))
630 x = 10 + c - 'a';
631 else
632 x = 10 + c - 'A';
633 x = x << 4;
634 c = Py_CHARMASK(*s);
635 s++;
636 if (isdigit(c))
637 x += c - '0';
638 else if (islower(c))
639 x += 10 + c - 'a';
640 else
641 x += 10 + c - 'A';
642 *p++ = x;
643 break;
644 }
645 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000646 PyErr_SetString(PyExc_ValueError,
647 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000648 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000649 }
650 if (strcmp(errors, "replace") == 0) {
651 *p++ = '?';
652 } else if (strcmp(errors, "ignore") == 0)
653 /* do nothing */;
654 else {
655 PyErr_Format(PyExc_ValueError,
656 "decoding error; "
657 "unknown error handling code: %.400s",
658 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000659 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000660 }
661#ifndef Py_USING_UNICODE
662 case 'u':
663 case 'U':
664 case 'N':
665 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000666 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 "Unicode escapes not legal "
668 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000669 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000670 }
671#endif
672 default:
673 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000674 s--;
675 goto non_esc; /* an arbitry number of unescaped
676 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000679 if (p-buf < newlen)
680 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000681 return v;
682 failed:
683 Py_DECREF(v);
684 return NULL;
685}
686
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000687static int
688string_getsize(register PyObject *op)
689{
690 char *s;
691 int len;
692 if (PyString_AsStringAndSize(op, &s, &len))
693 return -1;
694 return len;
695}
696
697static /*const*/ char *
698string_getbuffer(register PyObject *op)
699{
700 char *s;
701 int len;
702 if (PyString_AsStringAndSize(op, &s, &len))
703 return NULL;
704 return s;
705}
706
Guido van Rossumd7047b31995-01-02 19:07:15 +0000707int
Fred Drakeba096332000-07-09 07:04:36 +0000708PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (!PyString_Check(op))
711 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000712 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000713}
714
715/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000723int
724PyString_AsStringAndSize(register PyObject *obj,
725 register char **s,
726 register int *len)
727{
728 if (s == NULL) {
729 PyErr_BadInternalCall();
730 return -1;
731 }
732
733 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000734#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735 if (PyUnicode_Check(obj)) {
736 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
737 if (obj == NULL)
738 return -1;
739 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000740 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000741#endif
742 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 PyErr_Format(PyExc_TypeError,
744 "expected string or Unicode object, "
745 "%.200s found", obj->ob_type->tp_name);
746 return -1;
747 }
748 }
749
750 *s = PyString_AS_STRING(obj);
751 if (len != NULL)
752 *len = PyString_GET_SIZE(obj);
753 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
754 PyErr_SetString(PyExc_TypeError,
755 "expected string without null bytes");
756 return -1;
757 }
758 return 0;
759}
760
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000761/* Methods */
762
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000763static int
Fred Drakeba096332000-07-09 07:04:36 +0000764string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000765{
Martin v. Löwisfed24052002-10-07 13:55:50 +0000766#ifndef PRINT_MULTIBYTE_STRING
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000767 int i;
768 char c;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000769#else
770 char *scur, *send;
771 wchar_t c;
772 int cr;
773#endif
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000774 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000775
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000776 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000777 if (! PyString_CheckExact(op)) {
778 int ret;
779 /* A str subclass may have its own __str__ method. */
780 op = (PyStringObject *) PyObject_Str((PyObject *)op);
781 if (op == NULL)
782 return -1;
783 ret = string_print(op, fp, flags);
784 Py_DECREF(op);
785 return ret;
786 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000787 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000789 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000791
Thomas Wouters7e474022000-07-16 12:04:32 +0000792 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000794 if (memchr(op->ob_sval, '\'', op->ob_size) &&
795 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000796 quote = '"';
797
798 fputc(quote, fp);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000799#ifndef PRINT_MULTIBYTE_STRING
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800 for (i = 0; i < op->ob_size; i++) {
801 c = op->ob_sval[i];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000802#else
803 for (scur = op->ob_sval, send = op->ob_sval + op->ob_size;
804 scur < send; scur += cr) {
805 if ((cr = mbtowc(&c, scur, send - scur)) <= 0)
806 goto non_printable;
807#endif
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000808 if (c == quote || c == '\\')
Martin v. Löwisfed24052002-10-07 13:55:50 +0000809 fputc('\\', fp), fputc(c, fp);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000810 else if (c == '\t')
Martin v. Löwisfed24052002-10-07 13:55:50 +0000811 fputs("\\t", fp);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000812 else if (c == '\n')
Martin v. Löwisfed24052002-10-07 13:55:50 +0000813 fputs("\\n", fp);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000814 else if (c == '\r')
Martin v. Löwisfed24052002-10-07 13:55:50 +0000815 fputs("\\r", fp);
816#ifndef PRINT_MULTIBYTE_STRING
817 else if (' ' <= c && c < 0x7f)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000818 fputc(c, fp);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000819 else
820 fprintf(fp, "\\x%02x", c & 0xff);
821#else
822 else if (_isprint(c))
823 fwrite(scur, cr, 1, fp);
824 else {
825non_printable: cr = 1; /* unit to move cursor */
826 fprintf(fp, "\\x%02x", *scur & 0xff);
827 }
828#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000831 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832}
833
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000834PyObject *
835PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000837 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000838 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
839 PyObject *v;
840 if (newsize > INT_MAX) {
841 PyErr_SetString(PyExc_OverflowError,
842 "string is too large to make repr");
843 }
844 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000846 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 }
848 else {
Martin v. Löwisfed24052002-10-07 13:55:50 +0000849#ifndef PRINT_MULTIBYTE_STRING
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 register int i;
851 register char c;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000852#else
853 register char *scur, *send;
854 wchar_t c;
855 int cr;
856#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000858 int quote;
859
Thomas Wouters7e474022000-07-16 12:04:32 +0000860 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000861 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000862 if (smartquotes &&
863 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000864 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000865 quote = '"';
866
Tim Peters9161c8b2001-12-03 01:55:38 +0000867 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000868 *p++ = quote;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000869#ifndef PRINT_MULTIBYTE_STRING
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000871 /* There's at least enough room for a hex escape
872 and a closing quote. */
873 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 c = op->ob_sval[i];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000875#else
876 for (scur = op->ob_sval, send = op->ob_sval + op->ob_size;
877 scur < send; scur += cr) {
878 if ((cr = mbtowc(&c, scur, send - scur)) <= 0)
879 goto non_printable;
880#endif
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000883 else if (c == '\t')
884 *p++ = '\\', *p++ = 't';
885 else if (c == '\n')
886 *p++ = '\\', *p++ = 'n';
887 else if (c == '\r')
888 *p++ = '\\', *p++ = 'r';
Martin v. Löwisfed24052002-10-07 13:55:50 +0000889#ifndef PRINT_MULTIBYTE_STRING
890 else if (' ' <= c && c < 0x7f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000891 *p++ = c;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000892 else {
893#else
894 else if (_isprint(c))
895 memcpy(p, scur, cr), p += cr;
896 else {
897non_printable: cr = 1; c = *scur;
898#endif
899 *p++ = '\\'; *p++ = 'x';
900 *p++ = hexchars[(c >> 4) & 0x0f];
901 *p++ = hexchars[c & 0x0f];
902 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000904 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000905 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000906 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000908 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000909 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000910 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911}
912
Guido van Rossum189f1df2001-05-01 16:51:53 +0000913static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000914string_repr(PyObject *op)
915{
916 return PyString_Repr(op, 1);
917}
918
919static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000920string_str(PyObject *s)
921{
Tim Petersc9933152001-10-16 20:18:24 +0000922 assert(PyString_Check(s));
923 if (PyString_CheckExact(s)) {
924 Py_INCREF(s);
925 return s;
926 }
927 else {
928 /* Subtype -- return genuine string with the same value. */
929 PyStringObject *t = (PyStringObject *) s;
930 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
931 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000932}
933
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934static int
Fred Drakeba096332000-07-09 07:04:36 +0000935string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936{
937 return a->ob_size;
938}
939
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000941string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942{
943 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944 register PyStringObject *op;
945 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000946#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000947 if (PyUnicode_Check(bb))
948 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000949#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000950 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000951 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000952 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000953 return NULL;
954 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000956 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000957 if ((a->ob_size == 0 || b->ob_size == 0) &&
958 PyString_CheckExact(a) && PyString_CheckExact(b)) {
959 if (a->ob_size == 0) {
960 Py_INCREF(bb);
961 return bb;
962 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000963 Py_INCREF(a);
964 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 }
966 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000967 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000968 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000969 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000970 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000971 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000972 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000973 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000974 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000975 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
976 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
977 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000978 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000979#undef b
980}
981
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000983string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984{
985 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000986 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000987 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000988 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989 if (n < 0)
990 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000991 /* watch out for overflows: the size can overflow int,
992 * and the # of bytes needed can overflow size_t
993 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000994 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000995 if (n && size / n != a->ob_size) {
996 PyErr_SetString(PyExc_OverflowError,
997 "repeated string is too long");
998 return NULL;
999 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001000 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001001 Py_INCREF(a);
1002 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 }
Tim Peters8f422462000-09-09 06:13:41 +00001004 nbytes = size * sizeof(char);
1005 if (nbytes / sizeof(char) != (size_t)size ||
1006 nbytes + sizeof(PyStringObject) <= nbytes) {
1007 PyErr_SetString(PyExc_OverflowError,
1008 "repeated string is too long");
1009 return NULL;
1010 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001011 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001012 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001013 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001014 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001015 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001016 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001017 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001018 for (i = 0; i < size; i += a->ob_size)
1019 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
1020 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001021 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001022}
1023
1024/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1025
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001026static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001027string_slice(register PyStringObject *a, register int i, register int j)
1028 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001029{
1030 if (i < 0)
1031 i = 0;
1032 if (j < 0)
1033 j = 0; /* Avoid signed/unsigned bug in next line */
1034 if (j > a->ob_size)
1035 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001036 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1037 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001038 Py_INCREF(a);
1039 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001040 }
1041 if (j < i)
1042 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001043 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001044}
1045
Guido van Rossum9284a572000-03-07 15:53:43 +00001046static int
Fred Drakeba096332000-07-09 07:04:36 +00001047string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001048{
Barry Warsaw817918c2002-08-06 16:58:21 +00001049 const char *lhs, *rhs, *end;
1050 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001051
1052 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001053#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001054 if (PyUnicode_Check(el))
1055 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001056#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001057 if (!PyString_Check(el)) {
1058 PyErr_SetString(PyExc_TypeError,
1059 "'in <string>' requires string as left operand");
1060 return -1;
1061 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001062 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001063 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001064 rhs = PyString_AS_STRING(el);
1065 lhs = PyString_AS_STRING(a);
1066
1067 /* optimize for a single character */
1068 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001069 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001070
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001071 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001072 while (lhs <= end) {
1073 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001074 return 1;
1075 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001076
Guido van Rossum9284a572000-03-07 15:53:43 +00001077 return 0;
1078}
1079
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001080static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001081string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001082{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001083 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001084 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001085 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001086 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001087 return NULL;
1088 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001089 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001090 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001091 if (v == NULL)
1092 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001093 else {
1094#ifdef COUNT_ALLOCS
1095 one_strings++;
1096#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001097 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001098 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001099 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001100}
1101
Martin v. Löwiscd353062001-05-24 16:56:35 +00001102static PyObject*
1103string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001104{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001105 int c;
1106 int len_a, len_b;
1107 int min_len;
1108 PyObject *result;
1109
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001110 /* Make sure both arguments are strings. */
1111 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001112 result = Py_NotImplemented;
1113 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001114 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001115 if (a == b) {
1116 switch (op) {
1117 case Py_EQ:case Py_LE:case Py_GE:
1118 result = Py_True;
1119 goto out;
1120 case Py_NE:case Py_LT:case Py_GT:
1121 result = Py_False;
1122 goto out;
1123 }
1124 }
1125 if (op == Py_EQ) {
1126 /* Supporting Py_NE here as well does not save
1127 much time, since Py_NE is rarely used. */
1128 if (a->ob_size == b->ob_size
1129 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001130 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001131 a->ob_size) == 0)) {
1132 result = Py_True;
1133 } else {
1134 result = Py_False;
1135 }
1136 goto out;
1137 }
1138 len_a = a->ob_size; len_b = b->ob_size;
1139 min_len = (len_a < len_b) ? len_a : len_b;
1140 if (min_len > 0) {
1141 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1142 if (c==0)
1143 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1144 }else
1145 c = 0;
1146 if (c == 0)
1147 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1148 switch (op) {
1149 case Py_LT: c = c < 0; break;
1150 case Py_LE: c = c <= 0; break;
1151 case Py_EQ: assert(0); break; /* unreachable */
1152 case Py_NE: c = c != 0; break;
1153 case Py_GT: c = c > 0; break;
1154 case Py_GE: c = c >= 0; break;
1155 default:
1156 result = Py_NotImplemented;
1157 goto out;
1158 }
1159 result = c ? Py_True : Py_False;
1160 out:
1161 Py_INCREF(result);
1162 return result;
1163}
1164
1165int
1166_PyString_Eq(PyObject *o1, PyObject *o2)
1167{
1168 PyStringObject *a, *b;
1169 a = (PyStringObject*)o1;
1170 b = (PyStringObject*)o2;
1171 return a->ob_size == b->ob_size
1172 && *a->ob_sval == *b->ob_sval
1173 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001174}
1175
Guido van Rossum9bfef441993-03-29 10:43:31 +00001176static long
Fred Drakeba096332000-07-09 07:04:36 +00001177string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001179 register int len;
1180 register unsigned char *p;
1181 register long x;
1182
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001183 if (a->ob_shash != -1)
1184 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001185 len = a->ob_size;
1186 p = (unsigned char *) a->ob_sval;
1187 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001188 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001189 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001190 x ^= a->ob_size;
1191 if (x == -1)
1192 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001193 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001194 return x;
1195}
1196
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001197static PyObject*
1198string_subscript(PyStringObject* self, PyObject* item)
1199{
1200 if (PyInt_Check(item)) {
1201 long i = PyInt_AS_LONG(item);
1202 if (i < 0)
1203 i += PyString_GET_SIZE(self);
1204 return string_item(self,i);
1205 }
1206 else if (PyLong_Check(item)) {
1207 long i = PyLong_AsLong(item);
1208 if (i == -1 && PyErr_Occurred())
1209 return NULL;
1210 if (i < 0)
1211 i += PyString_GET_SIZE(self);
1212 return string_item(self,i);
1213 }
1214 else if (PySlice_Check(item)) {
1215 int start, stop, step, slicelength, cur, i;
1216 char* source_buf;
1217 char* result_buf;
1218 PyObject* result;
1219
1220 if (PySlice_GetIndicesEx((PySliceObject*)item,
1221 PyString_GET_SIZE(self),
1222 &start, &stop, &step, &slicelength) < 0) {
1223 return NULL;
1224 }
1225
1226 if (slicelength <= 0) {
1227 return PyString_FromStringAndSize("", 0);
1228 }
1229 else {
1230 source_buf = PyString_AsString((PyObject*)self);
1231 result_buf = PyMem_Malloc(slicelength);
1232
1233 for (cur = start, i = 0; i < slicelength;
1234 cur += step, i++) {
1235 result_buf[i] = source_buf[cur];
1236 }
1237
1238 result = PyString_FromStringAndSize(result_buf,
1239 slicelength);
1240 PyMem_Free(result_buf);
1241 return result;
1242 }
1243 }
1244 else {
1245 PyErr_SetString(PyExc_TypeError,
1246 "string indices must be integers");
1247 return NULL;
1248 }
1249}
1250
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001251static int
Fred Drakeba096332000-07-09 07:04:36 +00001252string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001253{
1254 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001255 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001256 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257 return -1;
1258 }
1259 *ptr = (void *)self->ob_sval;
1260 return self->ob_size;
1261}
1262
1263static int
Fred Drakeba096332000-07-09 07:04:36 +00001264string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001265{
Guido van Rossum045e6881997-09-08 18:30:11 +00001266 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001267 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001268 return -1;
1269}
1270
1271static int
Fred Drakeba096332000-07-09 07:04:36 +00001272string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001273{
1274 if ( lenp )
1275 *lenp = self->ob_size;
1276 return 1;
1277}
1278
Guido van Rossum1db70701998-10-08 02:18:52 +00001279static int
Fred Drakeba096332000-07-09 07:04:36 +00001280string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001281{
1282 if ( index != 0 ) {
1283 PyErr_SetString(PyExc_SystemError,
1284 "accessing non-existent string segment");
1285 return -1;
1286 }
1287 *ptr = self->ob_sval;
1288 return self->ob_size;
1289}
1290
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001291static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001292 (inquiry)string_length, /*sq_length*/
1293 (binaryfunc)string_concat, /*sq_concat*/
1294 (intargfunc)string_repeat, /*sq_repeat*/
1295 (intargfunc)string_item, /*sq_item*/
1296 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001297 0, /*sq_ass_item*/
1298 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001299 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001300};
1301
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001302static PyMappingMethods string_as_mapping = {
1303 (inquiry)string_length,
1304 (binaryfunc)string_subscript,
1305 0,
1306};
1307
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001308static PyBufferProcs string_as_buffer = {
1309 (getreadbufferproc)string_buffer_getreadbuf,
1310 (getwritebufferproc)string_buffer_getwritebuf,
1311 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001312 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001313};
1314
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315
1316
1317#define LEFTSTRIP 0
1318#define RIGHTSTRIP 1
1319#define BOTHSTRIP 2
1320
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001321/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001322static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1323
1324#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001325
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001326
1327static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001328split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001329{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001330 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001331 PyObject* item;
1332 PyObject *list = PyList_New(0);
1333
1334 if (list == NULL)
1335 return NULL;
1336
Guido van Rossum4c08d552000-03-10 22:55:18 +00001337 for (i = j = 0; i < len; ) {
1338 while (i < len && isspace(Py_CHARMASK(s[i])))
1339 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001340 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341 while (i < len && !isspace(Py_CHARMASK(s[i])))
1342 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001343 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001344 if (maxsplit-- <= 0)
1345 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1347 if (item == NULL)
1348 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 err = PyList_Append(list, item);
1350 Py_DECREF(item);
1351 if (err < 0)
1352 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353 while (i < len && isspace(Py_CHARMASK(s[i])))
1354 i++;
1355 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001356 }
1357 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001358 if (j < len) {
1359 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1360 if (item == NULL)
1361 goto finally;
1362 err = PyList_Append(list, item);
1363 Py_DECREF(item);
1364 if (err < 0)
1365 goto finally;
1366 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367 return list;
1368 finally:
1369 Py_DECREF(list);
1370 return NULL;
1371}
1372
1373
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001374PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375"S.split([sep [,maxsplit]]) -> list of strings\n\
1376\n\
1377Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001378delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001379splits are done. If sep is not specified or is None, any\n\
1380whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001381
1382static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001383string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001384{
1385 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001386 int maxsplit = -1;
1387 const char *s = PyString_AS_STRING(self), *sub;
1388 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389
Guido van Rossum4c08d552000-03-10 22:55:18 +00001390 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001392 if (maxsplit < 0)
1393 maxsplit = INT_MAX;
1394 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001395 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001396 if (PyString_Check(subobj)) {
1397 sub = PyString_AS_STRING(subobj);
1398 n = PyString_GET_SIZE(subobj);
1399 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001400#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001401 else if (PyUnicode_Check(subobj))
1402 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001403#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001404 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1405 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406 if (n == 0) {
1407 PyErr_SetString(PyExc_ValueError, "empty separator");
1408 return NULL;
1409 }
1410
1411 list = PyList_New(0);
1412 if (list == NULL)
1413 return NULL;
1414
1415 i = j = 0;
1416 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001417 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001418 if (maxsplit-- <= 0)
1419 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1421 if (item == NULL)
1422 goto fail;
1423 err = PyList_Append(list, item);
1424 Py_DECREF(item);
1425 if (err < 0)
1426 goto fail;
1427 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428 }
1429 else
1430 i++;
1431 }
1432 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1433 if (item == NULL)
1434 goto fail;
1435 err = PyList_Append(list, item);
1436 Py_DECREF(item);
1437 if (err < 0)
1438 goto fail;
1439
1440 return list;
1441
1442 fail:
1443 Py_DECREF(list);
1444 return NULL;
1445}
1446
1447
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001448PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449"S.join(sequence) -> string\n\
1450\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001451Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001452sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001453
1454static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001455string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456{
1457 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001458 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 char *p;
1461 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001462 size_t sz = 0;
1463 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001464 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465
Tim Peters19fe14e2001-01-19 03:03:47 +00001466 seq = PySequence_Fast(orig, "");
1467 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001468 if (PyErr_ExceptionMatches(PyExc_TypeError))
1469 PyErr_Format(PyExc_TypeError,
1470 "sequence expected, %.80s found",
1471 orig->ob_type->tp_name);
1472 return NULL;
1473 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001474
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001475 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001476 if (seqlen == 0) {
1477 Py_DECREF(seq);
1478 return PyString_FromString("");
1479 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001480 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001481 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001482 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1483 PyErr_Format(PyExc_TypeError,
1484 "sequence item 0: expected string,"
1485 " %.80s found",
1486 item->ob_type->tp_name);
1487 Py_DECREF(seq);
1488 return NULL;
1489 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001490 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001491 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001492 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001494
Tim Peters19fe14e2001-01-19 03:03:47 +00001495 /* There are at least two things to join. Do a pre-pass to figure out
1496 * the total amount of space we'll need (sz), see whether any argument
1497 * is absurd, and defer to the Unicode join if appropriate.
1498 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001499 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001500 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001501 item = PySequence_Fast_GET_ITEM(seq, i);
1502 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001503#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001504 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001505 /* Defer to Unicode join.
1506 * CAUTION: There's no gurantee that the
1507 * original sequence can be iterated over
1508 * again, so we must pass seq here.
1509 */
1510 PyObject *result;
1511 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001512 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001513 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001514 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001515#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001516 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001517 "sequence item %i: expected string,"
1518 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001519 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001520 Py_DECREF(seq);
1521 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001522 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001523 sz += PyString_GET_SIZE(item);
1524 if (i != 0)
1525 sz += seplen;
1526 if (sz < old_sz || sz > INT_MAX) {
1527 PyErr_SetString(PyExc_OverflowError,
1528 "join() is too long for a Python string");
1529 Py_DECREF(seq);
1530 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001532 }
1533
1534 /* Allocate result space. */
1535 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1536 if (res == NULL) {
1537 Py_DECREF(seq);
1538 return NULL;
1539 }
1540
1541 /* Catenate everything. */
1542 p = PyString_AS_STRING(res);
1543 for (i = 0; i < seqlen; ++i) {
1544 size_t n;
1545 item = PySequence_Fast_GET_ITEM(seq, i);
1546 n = PyString_GET_SIZE(item);
1547 memcpy(p, PyString_AS_STRING(item), n);
1548 p += n;
1549 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001550 memcpy(p, sep, seplen);
1551 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001552 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001554
Jeremy Hylton49048292000-07-11 03:28:17 +00001555 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001556 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001557}
1558
Tim Peters52e155e2001-06-16 05:42:57 +00001559PyObject *
1560_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001561{
Tim Petersa7259592001-06-16 05:11:17 +00001562 assert(sep != NULL && PyString_Check(sep));
1563 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001564 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001565}
1566
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001567static void
1568string_adjust_indices(int *start, int *end, int len)
1569{
1570 if (*end > len)
1571 *end = len;
1572 else if (*end < 0)
1573 *end += len;
1574 if (*end < 0)
1575 *end = 0;
1576 if (*start < 0)
1577 *start += len;
1578 if (*start < 0)
1579 *start = 0;
1580}
1581
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582static long
Fred Drakeba096332000-07-09 07:04:36 +00001583string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001584{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001585 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586 int len = PyString_GET_SIZE(self);
1587 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001588 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001590 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001591 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001592 return -2;
1593 if (PyString_Check(subobj)) {
1594 sub = PyString_AS_STRING(subobj);
1595 n = PyString_GET_SIZE(subobj);
1596 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001597#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001598 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001599 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001600#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001601 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602 return -2;
1603
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001604 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001605
Guido van Rossum4c08d552000-03-10 22:55:18 +00001606 if (dir > 0) {
1607 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001609 last -= n;
1610 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001611 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001612 return (long)i;
1613 }
1614 else {
1615 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001616
Guido van Rossum4c08d552000-03-10 22:55:18 +00001617 if (n == 0 && i <= last)
1618 return (long)last;
1619 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001620 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621 return (long)j;
1622 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001623
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624 return -1;
1625}
1626
1627
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001628PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629"S.find(sub [,start [,end]]) -> int\n\
1630\n\
1631Return the lowest index in S where substring sub is found,\n\
1632such that sub is contained within s[start,end]. Optional\n\
1633arguments start and end are interpreted as in slice notation.\n\
1634\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001635Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636
1637static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001638string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001640 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 if (result == -2)
1642 return NULL;
1643 return PyInt_FromLong(result);
1644}
1645
1646
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001647PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001648"S.index(sub [,start [,end]]) -> int\n\
1649\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001650Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651
1652static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001653string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001655 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656 if (result == -2)
1657 return NULL;
1658 if (result == -1) {
1659 PyErr_SetString(PyExc_ValueError,
1660 "substring not found in string.index");
1661 return NULL;
1662 }
1663 return PyInt_FromLong(result);
1664}
1665
1666
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001667PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668"S.rfind(sub [,start [,end]]) -> int\n\
1669\n\
1670Return the highest index in S where substring sub is found,\n\
1671such that sub is contained within s[start,end]. Optional\n\
1672arguments start and end are interpreted as in slice notation.\n\
1673\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001674Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675
1676static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001677string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001680 if (result == -2)
1681 return NULL;
1682 return PyInt_FromLong(result);
1683}
1684
1685
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001686PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687"S.rindex(sub [,start [,end]]) -> int\n\
1688\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001689Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690
1691static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001692string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001693{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001694 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 if (result == -2)
1696 return NULL;
1697 if (result == -1) {
1698 PyErr_SetString(PyExc_ValueError,
1699 "substring not found in string.rindex");
1700 return NULL;
1701 }
1702 return PyInt_FromLong(result);
1703}
1704
1705
1706static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001707do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1708{
1709 char *s = PyString_AS_STRING(self);
1710 int len = PyString_GET_SIZE(self);
1711 char *sep = PyString_AS_STRING(sepobj);
1712 int seplen = PyString_GET_SIZE(sepobj);
1713 int i, j;
1714
1715 i = 0;
1716 if (striptype != RIGHTSTRIP) {
1717 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1718 i++;
1719 }
1720 }
1721
1722 j = len;
1723 if (striptype != LEFTSTRIP) {
1724 do {
1725 j--;
1726 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1727 j++;
1728 }
1729
1730 if (i == 0 && j == len && PyString_CheckExact(self)) {
1731 Py_INCREF(self);
1732 return (PyObject*)self;
1733 }
1734 else
1735 return PyString_FromStringAndSize(s+i, j-i);
1736}
1737
1738
1739static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001740do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741{
1742 char *s = PyString_AS_STRING(self);
1743 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745 i = 0;
1746 if (striptype != RIGHTSTRIP) {
1747 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1748 i++;
1749 }
1750 }
1751
1752 j = len;
1753 if (striptype != LEFTSTRIP) {
1754 do {
1755 j--;
1756 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1757 j++;
1758 }
1759
Tim Peters8fa5dd02001-09-12 02:18:30 +00001760 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761 Py_INCREF(self);
1762 return (PyObject*)self;
1763 }
1764 else
1765 return PyString_FromStringAndSize(s+i, j-i);
1766}
1767
1768
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001769static PyObject *
1770do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1771{
1772 PyObject *sep = NULL;
1773
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001774 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001775 return NULL;
1776
1777 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001778 if (PyString_Check(sep))
1779 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001780#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001781 else if (PyUnicode_Check(sep)) {
1782 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1783 PyObject *res;
1784 if (uniself==NULL)
1785 return NULL;
1786 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1787 striptype, sep);
1788 Py_DECREF(uniself);
1789 return res;
1790 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001791#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001792 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001793 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001794#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001795 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001796#else
1797 "%s arg must be None or str",
1798#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001799 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001800 return NULL;
1801 }
1802 return do_xstrip(self, striptype, sep);
1803 }
1804
1805 return do_strip(self, striptype);
1806}
1807
1808
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001809PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001810"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811\n\
1812Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001813whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001814If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001815If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816
1817static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001818string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001820 if (PyTuple_GET_SIZE(args) == 0)
1821 return do_strip(self, BOTHSTRIP); /* Common case */
1822 else
1823 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824}
1825
1826
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001827PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001828"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001830Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001831If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001832If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001833
1834static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001835string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001837 if (PyTuple_GET_SIZE(args) == 0)
1838 return do_strip(self, LEFTSTRIP); /* Common case */
1839 else
1840 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841}
1842
1843
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001844PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001845"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001847Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001848If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001849If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850
1851static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001852string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001854 if (PyTuple_GET_SIZE(args) == 0)
1855 return do_strip(self, RIGHTSTRIP); /* Common case */
1856 else
1857 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858}
1859
1860
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001861PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862"S.lower() -> string\n\
1863\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001864Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865
1866static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001867string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868{
1869 char *s = PyString_AS_STRING(self), *s_new;
1870 int i, n = PyString_GET_SIZE(self);
1871 PyObject *new;
1872
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001873 new = PyString_FromStringAndSize(NULL, n);
1874 if (new == NULL)
1875 return NULL;
1876 s_new = PyString_AsString(new);
1877 for (i = 0; i < n; i++) {
1878 int c = Py_CHARMASK(*s++);
1879 if (isupper(c)) {
1880 *s_new = tolower(c);
1881 } else
1882 *s_new = c;
1883 s_new++;
1884 }
1885 return new;
1886}
1887
1888
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001889PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890"S.upper() -> string\n\
1891\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001892Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893
1894static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001895string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896{
1897 char *s = PyString_AS_STRING(self), *s_new;
1898 int i, n = PyString_GET_SIZE(self);
1899 PyObject *new;
1900
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901 new = PyString_FromStringAndSize(NULL, n);
1902 if (new == NULL)
1903 return NULL;
1904 s_new = PyString_AsString(new);
1905 for (i = 0; i < n; i++) {
1906 int c = Py_CHARMASK(*s++);
1907 if (islower(c)) {
1908 *s_new = toupper(c);
1909 } else
1910 *s_new = c;
1911 s_new++;
1912 }
1913 return new;
1914}
1915
1916
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001917PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001918"S.title() -> string\n\
1919\n\
1920Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001921characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001922
1923static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001924string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001925{
1926 char *s = PyString_AS_STRING(self), *s_new;
1927 int i, n = PyString_GET_SIZE(self);
1928 int previous_is_cased = 0;
1929 PyObject *new;
1930
Guido van Rossum4c08d552000-03-10 22:55:18 +00001931 new = PyString_FromStringAndSize(NULL, n);
1932 if (new == NULL)
1933 return NULL;
1934 s_new = PyString_AsString(new);
1935 for (i = 0; i < n; i++) {
1936 int c = Py_CHARMASK(*s++);
1937 if (islower(c)) {
1938 if (!previous_is_cased)
1939 c = toupper(c);
1940 previous_is_cased = 1;
1941 } else if (isupper(c)) {
1942 if (previous_is_cased)
1943 c = tolower(c);
1944 previous_is_cased = 1;
1945 } else
1946 previous_is_cased = 0;
1947 *s_new++ = c;
1948 }
1949 return new;
1950}
1951
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001952PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953"S.capitalize() -> string\n\
1954\n\
1955Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001956capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957
1958static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001959string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960{
1961 char *s = PyString_AS_STRING(self), *s_new;
1962 int i, n = PyString_GET_SIZE(self);
1963 PyObject *new;
1964
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965 new = PyString_FromStringAndSize(NULL, n);
1966 if (new == NULL)
1967 return NULL;
1968 s_new = PyString_AsString(new);
1969 if (0 < n) {
1970 int c = Py_CHARMASK(*s++);
1971 if (islower(c))
1972 *s_new = toupper(c);
1973 else
1974 *s_new = c;
1975 s_new++;
1976 }
1977 for (i = 1; i < n; i++) {
1978 int c = Py_CHARMASK(*s++);
1979 if (isupper(c))
1980 *s_new = tolower(c);
1981 else
1982 *s_new = c;
1983 s_new++;
1984 }
1985 return new;
1986}
1987
1988
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001989PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990"S.count(sub[, start[, end]]) -> int\n\
1991\n\
1992Return the number of occurrences of substring sub in string\n\
1993S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001994interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995
1996static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001997string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001999 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000 int len = PyString_GET_SIZE(self), n;
2001 int i = 0, last = INT_MAX;
2002 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002003 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004
Guido van Rossumc6821402000-05-08 14:08:05 +00002005 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2006 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002008
Guido van Rossum4c08d552000-03-10 22:55:18 +00002009 if (PyString_Check(subobj)) {
2010 sub = PyString_AS_STRING(subobj);
2011 n = PyString_GET_SIZE(subobj);
2012 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002013#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002014 else if (PyUnicode_Check(subobj)) {
2015 int count;
2016 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2017 if (count == -1)
2018 return NULL;
2019 else
2020 return PyInt_FromLong((long) count);
2021 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002022#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002023 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2024 return NULL;
2025
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002026 string_adjust_indices(&i, &last, len);
2027
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028 m = last + 1 - n;
2029 if (n == 0)
2030 return PyInt_FromLong((long) (m-i));
2031
2032 r = 0;
2033 while (i < m) {
2034 if (!memcmp(s+i, sub, n)) {
2035 r++;
2036 i += n;
2037 } else {
2038 i++;
2039 }
2040 }
2041 return PyInt_FromLong((long) r);
2042}
2043
2044
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002045PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046"S.swapcase() -> string\n\
2047\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002048Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002049converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002050
2051static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002052string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053{
2054 char *s = PyString_AS_STRING(self), *s_new;
2055 int i, n = PyString_GET_SIZE(self);
2056 PyObject *new;
2057
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058 new = PyString_FromStringAndSize(NULL, n);
2059 if (new == NULL)
2060 return NULL;
2061 s_new = PyString_AsString(new);
2062 for (i = 0; i < n; i++) {
2063 int c = Py_CHARMASK(*s++);
2064 if (islower(c)) {
2065 *s_new = toupper(c);
2066 }
2067 else if (isupper(c)) {
2068 *s_new = tolower(c);
2069 }
2070 else
2071 *s_new = c;
2072 s_new++;
2073 }
2074 return new;
2075}
2076
2077
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002078PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079"S.translate(table [,deletechars]) -> string\n\
2080\n\
2081Return a copy of the string S, where all characters occurring\n\
2082in the optional argument deletechars are removed, and the\n\
2083remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002084translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085
2086static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002087string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002089 register char *input, *output;
2090 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091 register int i, c, changed = 0;
2092 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002093 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094 int inlen, tablen, dellen = 0;
2095 PyObject *result;
2096 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002097 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002098
Guido van Rossum4c08d552000-03-10 22:55:18 +00002099 if (!PyArg_ParseTuple(args, "O|O:translate",
2100 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002102
2103 if (PyString_Check(tableobj)) {
2104 table1 = PyString_AS_STRING(tableobj);
2105 tablen = PyString_GET_SIZE(tableobj);
2106 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002107#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002108 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002109 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002110 parameter; instead a mapping to None will cause characters
2111 to be deleted. */
2112 if (delobj != NULL) {
2113 PyErr_SetString(PyExc_TypeError,
2114 "deletions are implemented differently for unicode");
2115 return NULL;
2116 }
2117 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2118 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002119#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002120 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002122
2123 if (delobj != NULL) {
2124 if (PyString_Check(delobj)) {
2125 del_table = PyString_AS_STRING(delobj);
2126 dellen = PyString_GET_SIZE(delobj);
2127 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002128#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002129 else if (PyUnicode_Check(delobj)) {
2130 PyErr_SetString(PyExc_TypeError,
2131 "deletions are implemented differently for unicode");
2132 return NULL;
2133 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002134#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002135 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2136 return NULL;
2137
2138 if (tablen != 256) {
2139 PyErr_SetString(PyExc_ValueError,
2140 "translation table must be 256 characters long");
2141 return NULL;
2142 }
2143 }
2144 else {
2145 del_table = NULL;
2146 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 }
2148
2149 table = table1;
2150 inlen = PyString_Size(input_obj);
2151 result = PyString_FromStringAndSize((char *)NULL, inlen);
2152 if (result == NULL)
2153 return NULL;
2154 output_start = output = PyString_AsString(result);
2155 input = PyString_AsString(input_obj);
2156
2157 if (dellen == 0) {
2158 /* If no deletions are required, use faster code */
2159 for (i = inlen; --i >= 0; ) {
2160 c = Py_CHARMASK(*input++);
2161 if (Py_CHARMASK((*output++ = table[c])) != c)
2162 changed = 1;
2163 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002164 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165 return result;
2166 Py_DECREF(result);
2167 Py_INCREF(input_obj);
2168 return input_obj;
2169 }
2170
2171 for (i = 0; i < 256; i++)
2172 trans_table[i] = Py_CHARMASK(table[i]);
2173
2174 for (i = 0; i < dellen; i++)
2175 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2176
2177 for (i = inlen; --i >= 0; ) {
2178 c = Py_CHARMASK(*input++);
2179 if (trans_table[c] != -1)
2180 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2181 continue;
2182 changed = 1;
2183 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002184 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185 Py_DECREF(result);
2186 Py_INCREF(input_obj);
2187 return input_obj;
2188 }
2189 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002190 if (inlen > 0)
2191 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192 return result;
2193}
2194
2195
2196/* What follows is used for implementing replace(). Perry Stoll. */
2197
2198/*
2199 mymemfind
2200
2201 strstr replacement for arbitrary blocks of memory.
2202
Barry Warsaw51ac5802000-03-20 16:36:48 +00002203 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204 contents of memory pointed to by PAT. Returns the index into MEM if
2205 found, or -1 if not found. If len of PAT is greater than length of
2206 MEM, the function returns -1.
2207*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002208static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002209mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210{
2211 register int ii;
2212
2213 /* pattern can not occur in the last pat_len-1 chars */
2214 len -= pat_len;
2215
2216 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002217 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218 return ii;
2219 }
2220 }
2221 return -1;
2222}
2223
2224/*
2225 mymemcnt
2226
2227 Return the number of distinct times PAT is found in MEM.
2228 meaning mem=1111 and pat==11 returns 2.
2229 mem=11111 and pat==11 also return 2.
2230 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002231static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002232mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233{
2234 register int offset = 0;
2235 int nfound = 0;
2236
2237 while (len >= 0) {
2238 offset = mymemfind(mem, len, pat, pat_len);
2239 if (offset == -1)
2240 break;
2241 mem += offset + pat_len;
2242 len -= offset + pat_len;
2243 nfound++;
2244 }
2245 return nfound;
2246}
2247
2248/*
2249 mymemreplace
2250
Thomas Wouters7e474022000-07-16 12:04:32 +00002251 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252 replaced with SUB.
2253
Thomas Wouters7e474022000-07-16 12:04:32 +00002254 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255 of PAT in STR, then the original string is returned. Otherwise, a new
2256 string is allocated here and returned.
2257
2258 on return, out_len is:
2259 the length of output string, or
2260 -1 if the input string is returned, or
2261 unchanged if an error occurs (no memory).
2262
2263 return value is:
2264 the new string allocated locally, or
2265 NULL if an error occurred.
2266*/
2267static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002268mymemreplace(const char *str, int len, /* input string */
2269 const char *pat, int pat_len, /* pattern string to find */
2270 const char *sub, int sub_len, /* substitution string */
2271 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002272 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002273{
2274 char *out_s;
2275 char *new_s;
2276 int nfound, offset, new_len;
2277
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002278 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002279 goto return_same;
2280
2281 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002282 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002283 if (count < 0)
2284 count = INT_MAX;
2285 else if (nfound > count)
2286 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287 if (nfound == 0)
2288 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002289
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002291 if (new_len == 0) {
2292 /* Have to allocate something for the caller to free(). */
2293 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002294 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002295 return NULL;
2296 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002298 else {
2299 assert(new_len > 0);
2300 new_s = (char *)PyMem_MALLOC(new_len);
2301 if (new_s == NULL)
2302 return NULL;
2303 out_s = new_s;
2304
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002305 if (pat_len > 0) {
2306 for (; nfound > 0; --nfound) {
2307 /* find index of next instance of pattern */
2308 offset = mymemfind(str, len, pat, pat_len);
2309 if (offset == -1)
2310 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002311
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002312 /* copy non matching part of input string */
2313 memcpy(new_s, str, offset);
2314 str += offset + pat_len;
2315 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002316
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002317 /* copy substitute into the output string */
2318 new_s += offset;
2319 memcpy(new_s, sub, sub_len);
2320 new_s += sub_len;
2321 }
2322 /* copy any remaining values into output string */
2323 if (len > 0)
2324 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002325 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002326 else {
2327 for (;;++str, --len) {
2328 memcpy(new_s, sub, sub_len);
2329 new_s += sub_len;
2330 if (--nfound <= 0) {
2331 memcpy(new_s, str, len);
2332 break;
2333 }
2334 *new_s++ = *str;
2335 }
2336 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002337 }
2338 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 return out_s;
2340
2341 return_same:
2342 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002343 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344}
2345
2346
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002347PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002348"S.replace (old, new[, maxsplit]) -> string\n\
2349\n\
2350Return a copy of string S with all occurrences of substring\n\
2351old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002352given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353
2354static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002355string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002357 const char *str = PyString_AS_STRING(self), *sub, *repl;
2358 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002359 const int len = PyString_GET_SIZE(self);
2360 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002361 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002363 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365 if (!PyArg_ParseTuple(args, "OO|i:replace",
2366 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002367 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368
2369 if (PyString_Check(subobj)) {
2370 sub = PyString_AS_STRING(subobj);
2371 sub_len = PyString_GET_SIZE(subobj);
2372 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002373#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002375 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002376 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002377#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2379 return NULL;
2380
2381 if (PyString_Check(replobj)) {
2382 repl = PyString_AS_STRING(replobj);
2383 repl_len = PyString_GET_SIZE(replobj);
2384 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002385#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002386 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002387 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002389#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2391 return NULL;
2392
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 if (new_s == NULL) {
2395 PyErr_NoMemory();
2396 return NULL;
2397 }
2398 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002399 if (PyString_CheckExact(self)) {
2400 /* we're returning another reference to self */
2401 new = (PyObject*)self;
2402 Py_INCREF(new);
2403 }
2404 else {
2405 new = PyString_FromStringAndSize(str, len);
2406 if (new == NULL)
2407 return NULL;
2408 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409 }
2410 else {
2411 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002412 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413 }
2414 return new;
2415}
2416
2417
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002418PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002419"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002420\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002421Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002423comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424
2425static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002426string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002430 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431 int plen;
2432 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002433 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002434 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002435
Guido van Rossumc6821402000-05-08 14:08:05 +00002436 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2437 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002438 return NULL;
2439 if (PyString_Check(subobj)) {
2440 prefix = PyString_AS_STRING(subobj);
2441 plen = PyString_GET_SIZE(subobj);
2442 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002443#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002444 else if (PyUnicode_Check(subobj)) {
2445 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002446 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002447 subobj, start, end, -1);
2448 if (rc == -1)
2449 return NULL;
2450 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002451 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002452 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002453#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002454 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455 return NULL;
2456
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002457 string_adjust_indices(&start, &end, len);
2458
2459 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002460 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002461
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002462 if (end-start >= plen)
2463 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2464 else
2465 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002466}
2467
2468
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002469PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002470"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002471\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002472Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002473optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002474comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002475
2476static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002477string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002479 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002481 const char* suffix;
2482 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002484 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002485 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002486
Guido van Rossumc6821402000-05-08 14:08:05 +00002487 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2488 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002489 return NULL;
2490 if (PyString_Check(subobj)) {
2491 suffix = PyString_AS_STRING(subobj);
2492 slen = PyString_GET_SIZE(subobj);
2493 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002494#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002495 else if (PyUnicode_Check(subobj)) {
2496 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002497 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002498 subobj, start, end, +1);
2499 if (rc == -1)
2500 return NULL;
2501 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002502 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002503 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002504#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002505 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506 return NULL;
2507
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002508 string_adjust_indices(&start, &end, len);
2509
2510 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002511 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002512
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002513 if (end-slen > start)
2514 start = end - slen;
2515 if (end-start >= slen)
2516 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2517 else
2518 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002519}
2520
2521
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002522PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002523"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002524\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002525Encodes S using the codec registered for encoding. encoding defaults\n\
2526to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002527handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002528a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2529'xmlcharrefreplace' as well as any other name registered with\n\
2530codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002531
2532static PyObject *
2533string_encode(PyStringObject *self, PyObject *args)
2534{
2535 char *encoding = NULL;
2536 char *errors = NULL;
2537 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2538 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002539 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2540}
2541
2542
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002543PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002544"S.decode([encoding[,errors]]) -> object\n\
2545\n\
2546Decodes S using the codec registered for encoding. encoding defaults\n\
2547to the default encoding. errors may be given to set a different error\n\
2548handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002549a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2550as well as any other name registerd with codecs.register_error that is\n\
2551able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002552
2553static PyObject *
2554string_decode(PyStringObject *self, PyObject *args)
2555{
2556 char *encoding = NULL;
2557 char *errors = NULL;
2558 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2559 return NULL;
2560 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002561}
2562
2563
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002564PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002565"S.expandtabs([tabsize]) -> string\n\
2566\n\
2567Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002568If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569
2570static PyObject*
2571string_expandtabs(PyStringObject *self, PyObject *args)
2572{
2573 const char *e, *p;
2574 char *q;
2575 int i, j;
2576 PyObject *u;
2577 int tabsize = 8;
2578
2579 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2580 return NULL;
2581
Thomas Wouters7e474022000-07-16 12:04:32 +00002582 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002583 i = j = 0;
2584 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2585 for (p = PyString_AS_STRING(self); p < e; p++)
2586 if (*p == '\t') {
2587 if (tabsize > 0)
2588 j += tabsize - (j % tabsize);
2589 }
2590 else {
2591 j++;
2592 if (*p == '\n' || *p == '\r') {
2593 i += j;
2594 j = 0;
2595 }
2596 }
2597
2598 /* Second pass: create output string and fill it */
2599 u = PyString_FromStringAndSize(NULL, i + j);
2600 if (!u)
2601 return NULL;
2602
2603 j = 0;
2604 q = PyString_AS_STRING(u);
2605
2606 for (p = PyString_AS_STRING(self); p < e; p++)
2607 if (*p == '\t') {
2608 if (tabsize > 0) {
2609 i = tabsize - (j % tabsize);
2610 j += i;
2611 while (i--)
2612 *q++ = ' ';
2613 }
2614 }
2615 else {
2616 j++;
2617 *q++ = *p;
2618 if (*p == '\n' || *p == '\r')
2619 j = 0;
2620 }
2621
2622 return u;
2623}
2624
Tim Peters8fa5dd02001-09-12 02:18:30 +00002625static PyObject *
2626pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002627{
2628 PyObject *u;
2629
2630 if (left < 0)
2631 left = 0;
2632 if (right < 0)
2633 right = 0;
2634
Tim Peters8fa5dd02001-09-12 02:18:30 +00002635 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002636 Py_INCREF(self);
2637 return (PyObject *)self;
2638 }
2639
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002640 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002641 left + PyString_GET_SIZE(self) + right);
2642 if (u) {
2643 if (left)
2644 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002645 memcpy(PyString_AS_STRING(u) + left,
2646 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002647 PyString_GET_SIZE(self));
2648 if (right)
2649 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2650 fill, right);
2651 }
2652
2653 return u;
2654}
2655
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002656PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002657"S.ljust(width) -> string\n"
2658"\n"
2659"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002660"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002661
2662static PyObject *
2663string_ljust(PyStringObject *self, PyObject *args)
2664{
2665 int width;
2666 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2667 return NULL;
2668
Tim Peters8fa5dd02001-09-12 02:18:30 +00002669 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002670 Py_INCREF(self);
2671 return (PyObject*) self;
2672 }
2673
2674 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2675}
2676
2677
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002678PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002679"S.rjust(width) -> string\n"
2680"\n"
2681"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002682"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002683
2684static PyObject *
2685string_rjust(PyStringObject *self, PyObject *args)
2686{
2687 int width;
2688 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2689 return NULL;
2690
Tim Peters8fa5dd02001-09-12 02:18:30 +00002691 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002692 Py_INCREF(self);
2693 return (PyObject*) self;
2694 }
2695
2696 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2697}
2698
2699
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002700PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002701"S.center(width) -> string\n"
2702"\n"
2703"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002704"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002705
2706static PyObject *
2707string_center(PyStringObject *self, PyObject *args)
2708{
2709 int marg, left;
2710 int width;
2711
2712 if (!PyArg_ParseTuple(args, "i:center", &width))
2713 return NULL;
2714
Tim Peters8fa5dd02001-09-12 02:18:30 +00002715 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002716 Py_INCREF(self);
2717 return (PyObject*) self;
2718 }
2719
2720 marg = width - PyString_GET_SIZE(self);
2721 left = marg / 2 + (marg & width & 1);
2722
2723 return pad(self, left, marg - left, ' ');
2724}
2725
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002726PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002727"S.zfill(width) -> string\n"
2728"\n"
2729"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002730"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002731
2732static PyObject *
2733string_zfill(PyStringObject *self, PyObject *args)
2734{
2735 int fill;
2736 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002737 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002738
2739 int width;
2740 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2741 return NULL;
2742
2743 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002744 if (PyString_CheckExact(self)) {
2745 Py_INCREF(self);
2746 return (PyObject*) self;
2747 }
2748 else
2749 return PyString_FromStringAndSize(
2750 PyString_AS_STRING(self),
2751 PyString_GET_SIZE(self)
2752 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002753 }
2754
2755 fill = width - PyString_GET_SIZE(self);
2756
2757 s = pad(self, fill, 0, '0');
2758
2759 if (s == NULL)
2760 return NULL;
2761
2762 p = PyString_AS_STRING(s);
2763 if (p[fill] == '+' || p[fill] == '-') {
2764 /* move sign to beginning of string */
2765 p[0] = p[fill];
2766 p[fill] = '0';
2767 }
2768
2769 return (PyObject*) s;
2770}
2771
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002772PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002773"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002774"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002775"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002776"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002777
2778static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002779string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002780{
Fred Drakeba096332000-07-09 07:04:36 +00002781 register const unsigned char *p
2782 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002783 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002784
Guido van Rossum4c08d552000-03-10 22:55:18 +00002785 /* Shortcut for single character strings */
2786 if (PyString_GET_SIZE(self) == 1 &&
2787 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002788 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002789
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002790 /* Special case for empty strings */
2791 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002792 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002793
Guido van Rossum4c08d552000-03-10 22:55:18 +00002794 e = p + PyString_GET_SIZE(self);
2795 for (; p < e; p++) {
2796 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002797 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002798 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002799 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002800}
2801
2802
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002803PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002804"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002805\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002806Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002807and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002808
2809static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002810string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002811{
Fred Drakeba096332000-07-09 07:04:36 +00002812 register const unsigned char *p
2813 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002814 register const unsigned char *e;
2815
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002816 /* Shortcut for single character strings */
2817 if (PyString_GET_SIZE(self) == 1 &&
2818 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002819 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002820
2821 /* Special case for empty strings */
2822 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002823 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002824
2825 e = p + PyString_GET_SIZE(self);
2826 for (; p < e; p++) {
2827 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002828 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002829 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002830 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002831}
2832
2833
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002834PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002835"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002836\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002837Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002838and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002839
2840static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002841string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002842{
Fred Drakeba096332000-07-09 07:04:36 +00002843 register const unsigned char *p
2844 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002845 register const unsigned char *e;
2846
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002847 /* Shortcut for single character strings */
2848 if (PyString_GET_SIZE(self) == 1 &&
2849 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002850 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002851
2852 /* Special case for empty strings */
2853 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002854 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002855
2856 e = p + PyString_GET_SIZE(self);
2857 for (; p < e; p++) {
2858 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002859 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002860 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002861 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002862}
2863
2864
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002865PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002866"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002868Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002869False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002870
2871static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002872string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002873{
Fred Drakeba096332000-07-09 07:04:36 +00002874 register const unsigned char *p
2875 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002876 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877
Guido van Rossum4c08d552000-03-10 22:55:18 +00002878 /* Shortcut for single character strings */
2879 if (PyString_GET_SIZE(self) == 1 &&
2880 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002881 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002882
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002883 /* Special case for empty strings */
2884 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002885 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002886
Guido van Rossum4c08d552000-03-10 22:55:18 +00002887 e = p + PyString_GET_SIZE(self);
2888 for (; p < e; p++) {
2889 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002890 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002891 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002892 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002893}
2894
2895
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002896PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002897"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002898\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002899Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002900at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002901
2902static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002903string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002904{
Fred Drakeba096332000-07-09 07:04:36 +00002905 register const unsigned char *p
2906 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002907 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002908 int cased;
2909
Guido van Rossum4c08d552000-03-10 22:55:18 +00002910 /* Shortcut for single character strings */
2911 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002912 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002913
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002914 /* Special case for empty strings */
2915 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002916 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002917
Guido van Rossum4c08d552000-03-10 22:55:18 +00002918 e = p + PyString_GET_SIZE(self);
2919 cased = 0;
2920 for (; p < e; p++) {
2921 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002922 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002923 else if (!cased && islower(*p))
2924 cased = 1;
2925 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002926 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002927}
2928
2929
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002930PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002931"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002932\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002933Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002934at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002935
2936static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002937string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002938{
Fred Drakeba096332000-07-09 07:04:36 +00002939 register const unsigned char *p
2940 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002941 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002942 int cased;
2943
Guido van Rossum4c08d552000-03-10 22:55:18 +00002944 /* Shortcut for single character strings */
2945 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002946 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002947
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002948 /* Special case for empty strings */
2949 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002950 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002951
Guido van Rossum4c08d552000-03-10 22:55:18 +00002952 e = p + PyString_GET_SIZE(self);
2953 cased = 0;
2954 for (; p < e; p++) {
2955 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002956 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002957 else if (!cased && isupper(*p))
2958 cased = 1;
2959 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002960 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002961}
2962
2963
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002964PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002965"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002966\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002967Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002968may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002969ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002970
2971static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002972string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002973{
Fred Drakeba096332000-07-09 07:04:36 +00002974 register const unsigned char *p
2975 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002976 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002977 int cased, previous_is_cased;
2978
Guido van Rossum4c08d552000-03-10 22:55:18 +00002979 /* Shortcut for single character strings */
2980 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002981 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002982
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002983 /* Special case for empty strings */
2984 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002985 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002986
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987 e = p + PyString_GET_SIZE(self);
2988 cased = 0;
2989 previous_is_cased = 0;
2990 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002991 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002992
2993 if (isupper(ch)) {
2994 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002995 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002996 previous_is_cased = 1;
2997 cased = 1;
2998 }
2999 else if (islower(ch)) {
3000 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003001 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003002 previous_is_cased = 1;
3003 cased = 1;
3004 }
3005 else
3006 previous_is_cased = 0;
3007 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003008 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003009}
3010
3011
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003012PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003013"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003014\n\
3015Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003016Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003017is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003018
3019#define SPLIT_APPEND(data, left, right) \
3020 str = PyString_FromStringAndSize(data + left, right - left); \
3021 if (!str) \
3022 goto onError; \
3023 if (PyList_Append(list, str)) { \
3024 Py_DECREF(str); \
3025 goto onError; \
3026 } \
3027 else \
3028 Py_DECREF(str);
3029
3030static PyObject*
3031string_splitlines(PyStringObject *self, PyObject *args)
3032{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003033 register int i;
3034 register int j;
3035 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003036 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003037 PyObject *list;
3038 PyObject *str;
3039 char *data;
3040
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003041 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003042 return NULL;
3043
3044 data = PyString_AS_STRING(self);
3045 len = PyString_GET_SIZE(self);
3046
Guido van Rossum4c08d552000-03-10 22:55:18 +00003047 list = PyList_New(0);
3048 if (!list)
3049 goto onError;
3050
3051 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003052 int eol;
3053
Guido van Rossum4c08d552000-03-10 22:55:18 +00003054 /* Find a line and append it */
3055 while (i < len && data[i] != '\n' && data[i] != '\r')
3056 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003057
3058 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003059 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003060 if (i < len) {
3061 if (data[i] == '\r' && i + 1 < len &&
3062 data[i+1] == '\n')
3063 i += 2;
3064 else
3065 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003066 if (keepends)
3067 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003068 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003069 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070 j = i;
3071 }
3072 if (j < len) {
3073 SPLIT_APPEND(data, j, len);
3074 }
3075
3076 return list;
3077
3078 onError:
3079 Py_DECREF(list);
3080 return NULL;
3081}
3082
3083#undef SPLIT_APPEND
3084
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003085
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003086static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003087string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003088 /* Counterparts of the obsolete stropmodule functions; except
3089 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003090 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3091 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3092 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3093 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003094 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3095 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3096 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3097 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3098 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3099 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3100 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003101 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3102 capitalize__doc__},
3103 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3104 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3105 endswith__doc__},
3106 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3107 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3108 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3109 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3110 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3111 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3112 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3113 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3114 startswith__doc__},
3115 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3116 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3117 swapcase__doc__},
3118 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3119 translate__doc__},
3120 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3121 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3122 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3123 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3124 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3125 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3126 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3127 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3128 expandtabs__doc__},
3129 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3130 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003131 {NULL, NULL} /* sentinel */
3132};
3133
Jeremy Hylton938ace62002-07-17 16:30:39 +00003134static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003135str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3136
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003138string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003139{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003140 PyObject *x = NULL;
3141 static char *kwlist[] = {"object", 0};
3142
Guido van Rossumae960af2001-08-30 03:11:59 +00003143 if (type != &PyString_Type)
3144 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003145 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3146 return NULL;
3147 if (x == NULL)
3148 return PyString_FromString("");
3149 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003150}
3151
Guido van Rossumae960af2001-08-30 03:11:59 +00003152static PyObject *
3153str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3154{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003155 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003156 int n;
3157
3158 assert(PyType_IsSubtype(type, &PyString_Type));
3159 tmp = string_new(&PyString_Type, args, kwds);
3160 if (tmp == NULL)
3161 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003162 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003163 n = PyString_GET_SIZE(tmp);
3164 pnew = type->tp_alloc(type, n);
3165 if (pnew != NULL) {
3166 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003167 ((PyStringObject *)pnew)->ob_shash =
3168 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003169 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003170 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003171 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003172 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003173}
3174
Guido van Rossumcacfc072002-05-24 19:01:59 +00003175static PyObject *
3176basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3177{
3178 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003179 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003180 return NULL;
3181}
3182
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003183PyDoc_STRVAR(basestring_doc,
3184"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003185
3186PyTypeObject PyBaseString_Type = {
3187 PyObject_HEAD_INIT(&PyType_Type)
3188 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003189 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003190 0,
3191 0,
3192 0, /* tp_dealloc */
3193 0, /* tp_print */
3194 0, /* tp_getattr */
3195 0, /* tp_setattr */
3196 0, /* tp_compare */
3197 0, /* tp_repr */
3198 0, /* tp_as_number */
3199 0, /* tp_as_sequence */
3200 0, /* tp_as_mapping */
3201 0, /* tp_hash */
3202 0, /* tp_call */
3203 0, /* tp_str */
3204 0, /* tp_getattro */
3205 0, /* tp_setattro */
3206 0, /* tp_as_buffer */
3207 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3208 basestring_doc, /* tp_doc */
3209 0, /* tp_traverse */
3210 0, /* tp_clear */
3211 0, /* tp_richcompare */
3212 0, /* tp_weaklistoffset */
3213 0, /* tp_iter */
3214 0, /* tp_iternext */
3215 0, /* tp_methods */
3216 0, /* tp_members */
3217 0, /* tp_getset */
3218 &PyBaseObject_Type, /* tp_base */
3219 0, /* tp_dict */
3220 0, /* tp_descr_get */
3221 0, /* tp_descr_set */
3222 0, /* tp_dictoffset */
3223 0, /* tp_init */
3224 0, /* tp_alloc */
3225 basestring_new, /* tp_new */
3226 0, /* tp_free */
3227};
3228
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003229PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003230"str(object) -> string\n\
3231\n\
3232Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003233If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003234
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003235PyTypeObject PyString_Type = {
3236 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003237 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003238 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003239 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003240 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003241 (destructor)string_dealloc, /* tp_dealloc */
3242 (printfunc)string_print, /* tp_print */
3243 0, /* tp_getattr */
3244 0, /* tp_setattr */
3245 0, /* tp_compare */
3246 (reprfunc)string_repr, /* tp_repr */
3247 0, /* tp_as_number */
3248 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003249 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003250 (hashfunc)string_hash, /* tp_hash */
3251 0, /* tp_call */
3252 (reprfunc)string_str, /* tp_str */
3253 PyObject_GenericGetAttr, /* tp_getattro */
3254 0, /* tp_setattro */
3255 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00003256 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003257 string_doc, /* tp_doc */
3258 0, /* tp_traverse */
3259 0, /* tp_clear */
3260 (richcmpfunc)string_richcompare, /* tp_richcompare */
3261 0, /* tp_weaklistoffset */
3262 0, /* tp_iter */
3263 0, /* tp_iternext */
3264 string_methods, /* tp_methods */
3265 0, /* tp_members */
3266 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003267 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003268 0, /* tp_dict */
3269 0, /* tp_descr_get */
3270 0, /* tp_descr_set */
3271 0, /* tp_dictoffset */
3272 0, /* tp_init */
3273 0, /* tp_alloc */
3274 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003275 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003276};
3277
3278void
Fred Drakeba096332000-07-09 07:04:36 +00003279PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003280{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003281 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003282 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003283 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003284 if (w == NULL || !PyString_Check(*pv)) {
3285 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003286 *pv = NULL;
3287 return;
3288 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003289 v = string_concat((PyStringObject *) *pv, w);
3290 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003291 *pv = v;
3292}
3293
Guido van Rossum013142a1994-08-30 08:19:36 +00003294void
Fred Drakeba096332000-07-09 07:04:36 +00003295PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003296{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003297 PyString_Concat(pv, w);
3298 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003299}
3300
3301
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003302/* The following function breaks the notion that strings are immutable:
3303 it changes the size of a string. We get away with this only if there
3304 is only one module referencing the object. You can also think of it
3305 as creating a new string object and destroying the old one, only
3306 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003307 already be known to some other part of the code...
3308 Note that if there's not enough memory to resize the string, the original
3309 string object at *pv is deallocated, *pv is set to NULL, an "out of
3310 memory" exception is set, and -1 is returned. Else (on success) 0 is
3311 returned, and the value in *pv may or may not be the same as on input.
3312 As always, an extra byte is allocated for a trailing \0 byte (newsize
3313 does *not* include that), and a trailing \0 byte is stored.
3314*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003315
3316int
Fred Drakeba096332000-07-09 07:04:36 +00003317_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003318{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003319 register PyObject *v;
3320 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003321 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003322 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003323 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003324 Py_DECREF(v);
3325 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003326 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003327 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003328 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003329 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003330 _Py_ForgetReference(v);
3331 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003332 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003333 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003334 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003335 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003336 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003337 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003338 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003339 _Py_NewReference(*pv);
3340 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003341 sv->ob_size = newsize;
3342 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003343 return 0;
3344}
Guido van Rossume5372401993-03-16 12:15:04 +00003345
3346/* Helpers for formatstring */
3347
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003348static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003349getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003350{
3351 int argidx = *p_argidx;
3352 if (argidx < arglen) {
3353 (*p_argidx)++;
3354 if (arglen < 0)
3355 return args;
3356 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003357 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003358 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003359 PyErr_SetString(PyExc_TypeError,
3360 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003361 return NULL;
3362}
3363
Tim Peters38fd5b62000-09-21 05:43:11 +00003364/* Format codes
3365 * F_LJUST '-'
3366 * F_SIGN '+'
3367 * F_BLANK ' '
3368 * F_ALT '#'
3369 * F_ZERO '0'
3370 */
Guido van Rossume5372401993-03-16 12:15:04 +00003371#define F_LJUST (1<<0)
3372#define F_SIGN (1<<1)
3373#define F_BLANK (1<<2)
3374#define F_ALT (1<<3)
3375#define F_ZERO (1<<4)
3376
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003377static int
Fred Drakeba096332000-07-09 07:04:36 +00003378formatfloat(char *buf, size_t buflen, int flags,
3379 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003380{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003381 /* fmt = '%#.' + `prec` + `type`
3382 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003383 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003384 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003385 x = PyFloat_AsDouble(v);
3386 if (x == -1.0 && PyErr_Occurred()) {
3387 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003388 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003389 }
Guido van Rossume5372401993-03-16 12:15:04 +00003390 if (prec < 0)
3391 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003392 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3393 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003394 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3395 (flags&F_ALT) ? "#" : "",
3396 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003397 /* worst case length calc to ensure no buffer overrun:
3398 fmt = %#.<prec>g
3399 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003400 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003401 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3402 If prec=0 the effective precision is 1 (the leading digit is
3403 always given), therefore increase by one to 10+prec. */
3404 if (buflen <= (size_t)10 + (size_t)prec) {
3405 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003406 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003407 return -1;
3408 }
Tim Peters885d4572001-11-28 20:27:42 +00003409 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003410 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003411}
3412
Tim Peters38fd5b62000-09-21 05:43:11 +00003413/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3414 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3415 * Python's regular ints.
3416 * Return value: a new PyString*, or NULL if error.
3417 * . *pbuf is set to point into it,
3418 * *plen set to the # of chars following that.
3419 * Caller must decref it when done using pbuf.
3420 * The string starting at *pbuf is of the form
3421 * "-"? ("0x" | "0X")? digit+
3422 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003423 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003424 * There will be at least prec digits, zero-filled on the left if
3425 * necessary to get that many.
3426 * val object to be converted
3427 * flags bitmask of format flags; only F_ALT is looked at
3428 * prec minimum number of digits; 0-fill on left if needed
3429 * type a character in [duoxX]; u acts the same as d
3430 *
3431 * CAUTION: o, x and X conversions on regular ints can never
3432 * produce a '-' sign, but can for Python's unbounded ints.
3433 */
3434PyObject*
3435_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3436 char **pbuf, int *plen)
3437{
3438 PyObject *result = NULL;
3439 char *buf;
3440 int i;
3441 int sign; /* 1 if '-', else 0 */
3442 int len; /* number of characters */
3443 int numdigits; /* len == numnondigits + numdigits */
3444 int numnondigits = 0;
3445
3446 switch (type) {
3447 case 'd':
3448 case 'u':
3449 result = val->ob_type->tp_str(val);
3450 break;
3451 case 'o':
3452 result = val->ob_type->tp_as_number->nb_oct(val);
3453 break;
3454 case 'x':
3455 case 'X':
3456 numnondigits = 2;
3457 result = val->ob_type->tp_as_number->nb_hex(val);
3458 break;
3459 default:
3460 assert(!"'type' not in [duoxX]");
3461 }
3462 if (!result)
3463 return NULL;
3464
3465 /* To modify the string in-place, there can only be one reference. */
3466 if (result->ob_refcnt != 1) {
3467 PyErr_BadInternalCall();
3468 return NULL;
3469 }
3470 buf = PyString_AsString(result);
3471 len = PyString_Size(result);
3472 if (buf[len-1] == 'L') {
3473 --len;
3474 buf[len] = '\0';
3475 }
3476 sign = buf[0] == '-';
3477 numnondigits += sign;
3478 numdigits = len - numnondigits;
3479 assert(numdigits > 0);
3480
Tim Petersfff53252001-04-12 18:38:48 +00003481 /* Get rid of base marker unless F_ALT */
3482 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003483 /* Need to skip 0x, 0X or 0. */
3484 int skipped = 0;
3485 switch (type) {
3486 case 'o':
3487 assert(buf[sign] == '0');
3488 /* If 0 is only digit, leave it alone. */
3489 if (numdigits > 1) {
3490 skipped = 1;
3491 --numdigits;
3492 }
3493 break;
3494 case 'x':
3495 case 'X':
3496 assert(buf[sign] == '0');
3497 assert(buf[sign + 1] == 'x');
3498 skipped = 2;
3499 numnondigits -= 2;
3500 break;
3501 }
3502 if (skipped) {
3503 buf += skipped;
3504 len -= skipped;
3505 if (sign)
3506 buf[0] = '-';
3507 }
3508 assert(len == numnondigits + numdigits);
3509 assert(numdigits > 0);
3510 }
3511
3512 /* Fill with leading zeroes to meet minimum width. */
3513 if (prec > numdigits) {
3514 PyObject *r1 = PyString_FromStringAndSize(NULL,
3515 numnondigits + prec);
3516 char *b1;
3517 if (!r1) {
3518 Py_DECREF(result);
3519 return NULL;
3520 }
3521 b1 = PyString_AS_STRING(r1);
3522 for (i = 0; i < numnondigits; ++i)
3523 *b1++ = *buf++;
3524 for (i = 0; i < prec - numdigits; i++)
3525 *b1++ = '0';
3526 for (i = 0; i < numdigits; i++)
3527 *b1++ = *buf++;
3528 *b1 = '\0';
3529 Py_DECREF(result);
3530 result = r1;
3531 buf = PyString_AS_STRING(result);
3532 len = numnondigits + prec;
3533 }
3534
3535 /* Fix up case for hex conversions. */
3536 switch (type) {
3537 case 'x':
3538 /* Need to convert all upper case letters to lower case. */
3539 for (i = 0; i < len; i++)
3540 if (buf[i] >= 'A' && buf[i] <= 'F')
3541 buf[i] += 'a'-'A';
3542 break;
3543 case 'X':
3544 /* Need to convert 0x to 0X (and -0x to -0X). */
3545 if (buf[sign + 1] == 'x')
3546 buf[sign + 1] = 'X';
3547 break;
3548 }
3549 *pbuf = buf;
3550 *plen = len;
3551 return result;
3552}
3553
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003554static int
Fred Drakeba096332000-07-09 07:04:36 +00003555formatint(char *buf, size_t buflen, int flags,
3556 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003557{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003558 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003559 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3560 + 1 + 1 = 24 */
3561 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003562 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003563
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003564 x = PyInt_AsLong(v);
3565 if (x == -1 && PyErr_Occurred()) {
3566 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003567 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003568 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003569 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003570 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003571 "%u/%o/%x/%X of negative int will return "
3572 "a signed string in Python 2.4 and up") < 0)
3573 return -1;
3574 }
Guido van Rossume5372401993-03-16 12:15:04 +00003575 if (prec < 0)
3576 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003577
3578 if ((flags & F_ALT) &&
3579 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003580 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003581 * of issues that cause pain:
3582 * - when 0 is being converted, the C standard leaves off
3583 * the '0x' or '0X', which is inconsistent with other
3584 * %#x/%#X conversions and inconsistent with Python's
3585 * hex() function
3586 * - there are platforms that violate the standard and
3587 * convert 0 with the '0x' or '0X'
3588 * (Metrowerks, Compaq Tru64)
3589 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003590 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003591 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003592 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003593 * We can achieve the desired consistency by inserting our
3594 * own '0x' or '0X' prefix, and substituting %x/%X in place
3595 * of %#x/%#X.
3596 *
3597 * Note that this is the same approach as used in
3598 * formatint() in unicodeobject.c
3599 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003600 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003601 type, prec, type);
3602 }
3603 else {
3604 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003605 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003606 prec, type);
3607 }
3608
Tim Peters38fd5b62000-09-21 05:43:11 +00003609 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003610 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3611 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003612 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003613 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003614 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003615 return -1;
3616 }
Tim Peters885d4572001-11-28 20:27:42 +00003617 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003618 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003619}
3620
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003621static int
Fred Drakeba096332000-07-09 07:04:36 +00003622formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003623{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003624 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003625 if (PyString_Check(v)) {
3626 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003627 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003628 }
3629 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003630 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003631 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003632 }
3633 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003634 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003635}
3636
Guido van Rossum013142a1994-08-30 08:19:36 +00003637
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003638/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3639
3640 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3641 chars are formatted. XXX This is a magic number. Each formatting
3642 routine does bounds checking to ensure no overflow, but a better
3643 solution may be to malloc a buffer of appropriate size for each
3644 format. For now, the current solution is sufficient.
3645*/
3646#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003647
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003648PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003649PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003650{
3651 char *fmt, *res;
3652 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003653 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003654 PyObject *result, *orig_args;
3655#ifdef Py_USING_UNICODE
3656 PyObject *v, *w;
3657#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003658 PyObject *dict = NULL;
3659 if (format == NULL || !PyString_Check(format) || args == NULL) {
3660 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003661 return NULL;
3662 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003663 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003664 fmt = PyString_AS_STRING(format);
3665 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003666 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003667 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003668 if (result == NULL)
3669 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003670 res = PyString_AsString(result);
3671 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003672 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003673 argidx = 0;
3674 }
3675 else {
3676 arglen = -1;
3677 argidx = -2;
3678 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003679 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003680 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003681 while (--fmtcnt >= 0) {
3682 if (*fmt != '%') {
3683 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003684 rescnt = fmtcnt + 100;
3685 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003686 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003687 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003688 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003689 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003690 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003691 }
3692 *res++ = *fmt++;
3693 }
3694 else {
3695 /* Got a format specifier */
3696 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003697 int width = -1;
3698 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003699 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003700 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003701 PyObject *v = NULL;
3702 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003703 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003704 int sign;
3705 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003706 char formatbuf[FORMATBUFLEN];
3707 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003708#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003709 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003710 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003711#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003712
Guido van Rossumda9c2711996-12-05 21:58:58 +00003713 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003714 if (*fmt == '(') {
3715 char *keystart;
3716 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003717 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003718 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003719
3720 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003721 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003722 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003723 goto error;
3724 }
3725 ++fmt;
3726 --fmtcnt;
3727 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003728 /* Skip over balanced parentheses */
3729 while (pcount > 0 && --fmtcnt >= 0) {
3730 if (*fmt == ')')
3731 --pcount;
3732 else if (*fmt == '(')
3733 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003734 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003735 }
3736 keylen = fmt - keystart - 1;
3737 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003738 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003739 "incomplete format key");
3740 goto error;
3741 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003742 key = PyString_FromStringAndSize(keystart,
3743 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003744 if (key == NULL)
3745 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003746 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003747 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003748 args_owned = 0;
3749 }
3750 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003751 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003752 if (args == NULL) {
3753 goto error;
3754 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003755 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003756 arglen = -1;
3757 argidx = -2;
3758 }
Guido van Rossume5372401993-03-16 12:15:04 +00003759 while (--fmtcnt >= 0) {
3760 switch (c = *fmt++) {
3761 case '-': flags |= F_LJUST; continue;
3762 case '+': flags |= F_SIGN; continue;
3763 case ' ': flags |= F_BLANK; continue;
3764 case '#': flags |= F_ALT; continue;
3765 case '0': flags |= F_ZERO; continue;
3766 }
3767 break;
3768 }
3769 if (c == '*') {
3770 v = getnextarg(args, arglen, &argidx);
3771 if (v == NULL)
3772 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003773 if (!PyInt_Check(v)) {
3774 PyErr_SetString(PyExc_TypeError,
3775 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003776 goto error;
3777 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003778 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003779 if (width < 0) {
3780 flags |= F_LJUST;
3781 width = -width;
3782 }
Guido van Rossume5372401993-03-16 12:15:04 +00003783 if (--fmtcnt >= 0)
3784 c = *fmt++;
3785 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003786 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003787 width = c - '0';
3788 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003789 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003790 if (!isdigit(c))
3791 break;
3792 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003793 PyErr_SetString(
3794 PyExc_ValueError,
3795 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003796 goto error;
3797 }
3798 width = width*10 + (c - '0');
3799 }
3800 }
3801 if (c == '.') {
3802 prec = 0;
3803 if (--fmtcnt >= 0)
3804 c = *fmt++;
3805 if (c == '*') {
3806 v = getnextarg(args, arglen, &argidx);
3807 if (v == NULL)
3808 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003809 if (!PyInt_Check(v)) {
3810 PyErr_SetString(
3811 PyExc_TypeError,
3812 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003813 goto error;
3814 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003815 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003816 if (prec < 0)
3817 prec = 0;
3818 if (--fmtcnt >= 0)
3819 c = *fmt++;
3820 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003821 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003822 prec = c - '0';
3823 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003824 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003825 if (!isdigit(c))
3826 break;
3827 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003828 PyErr_SetString(
3829 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003830 "prec too big");
3831 goto error;
3832 }
3833 prec = prec*10 + (c - '0');
3834 }
3835 }
3836 } /* prec */
3837 if (fmtcnt >= 0) {
3838 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003839 if (--fmtcnt >= 0)
3840 c = *fmt++;
3841 }
3842 }
3843 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003844 PyErr_SetString(PyExc_ValueError,
3845 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003846 goto error;
3847 }
3848 if (c != '%') {
3849 v = getnextarg(args, arglen, &argidx);
3850 if (v == NULL)
3851 goto error;
3852 }
3853 sign = 0;
3854 fill = ' ';
3855 switch (c) {
3856 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003857 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003858 len = 1;
3859 break;
3860 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003861#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003862 if (PyUnicode_Check(v)) {
3863 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003864 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003865 goto unicode;
3866 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003867#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00003868 /* Fall through */
3869 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003870 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003871 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003872 else
3873 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003874 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003875 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003876 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00003877 /* XXX Note: this should never happen,
3878 since PyObject_Repr() and
3879 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003880 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00003881 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003882 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003883 goto error;
3884 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003885 pbuf = PyString_AS_STRING(temp);
3886 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003887 if (prec >= 0 && len > prec)
3888 len = prec;
3889 break;
3890 case 'i':
3891 case 'd':
3892 case 'u':
3893 case 'o':
3894 case 'x':
3895 case 'X':
3896 if (c == 'i')
3897 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003898 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003899 temp = _PyString_FormatLong(v, flags,
3900 prec, c, &pbuf, &len);
3901 if (!temp)
3902 goto error;
3903 /* unbounded ints can always produce
3904 a sign character! */
3905 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003906 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003907 else {
3908 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003909 len = formatint(pbuf,
3910 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003911 flags, prec, c, v);
3912 if (len < 0)
3913 goto error;
3914 /* only d conversion is signed */
3915 sign = c == 'd';
3916 }
3917 if (flags & F_ZERO)
3918 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003919 break;
3920 case 'e':
3921 case 'E':
3922 case 'f':
3923 case 'g':
3924 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003925 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003926 len = formatfloat(pbuf, sizeof(formatbuf),
3927 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003928 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003929 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003930 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003931 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003932 fill = '0';
3933 break;
3934 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003935 pbuf = formatbuf;
3936 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003937 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003938 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003939 break;
3940 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003941 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003942 "unsupported format character '%c' (0x%x) "
3943 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00003944 c, c,
3945 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003946 goto error;
3947 }
3948 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003949 if (*pbuf == '-' || *pbuf == '+') {
3950 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003951 len--;
3952 }
3953 else if (flags & F_SIGN)
3954 sign = '+';
3955 else if (flags & F_BLANK)
3956 sign = ' ';
3957 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003958 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003959 }
3960 if (width < len)
3961 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003962 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003963 reslen -= rescnt;
3964 rescnt = width + fmtcnt + 100;
3965 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003966 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003967 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003968 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003969 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003970 }
3971 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003972 if (fill != ' ')
3973 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003974 rescnt--;
3975 if (width > len)
3976 width--;
3977 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003978 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3979 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003980 assert(pbuf[1] == c);
3981 if (fill != ' ') {
3982 *res++ = *pbuf++;
3983 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003984 }
Tim Petersfff53252001-04-12 18:38:48 +00003985 rescnt -= 2;
3986 width -= 2;
3987 if (width < 0)
3988 width = 0;
3989 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003990 }
3991 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003992 do {
3993 --rescnt;
3994 *res++ = fill;
3995 } while (--width > len);
3996 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003997 if (fill == ' ') {
3998 if (sign)
3999 *res++ = sign;
4000 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004001 (c == 'x' || c == 'X')) {
4002 assert(pbuf[0] == '0');
4003 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004004 *res++ = *pbuf++;
4005 *res++ = *pbuf++;
4006 }
4007 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004008 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004009 res += len;
4010 rescnt -= len;
4011 while (--width >= len) {
4012 --rescnt;
4013 *res++ = ' ';
4014 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004015 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004016 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004017 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004018 goto error;
4019 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004020 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004021 } /* '%' */
4022 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004023 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004024 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004025 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004026 goto error;
4027 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004028 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004029 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004030 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004031 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004032 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004033
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004034#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004035 unicode:
4036 if (args_owned) {
4037 Py_DECREF(args);
4038 args_owned = 0;
4039 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004040 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004041 if (PyTuple_Check(orig_args) && argidx > 0) {
4042 PyObject *v;
4043 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4044 v = PyTuple_New(n);
4045 if (v == NULL)
4046 goto error;
4047 while (--n >= 0) {
4048 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4049 Py_INCREF(w);
4050 PyTuple_SET_ITEM(v, n, w);
4051 }
4052 args = v;
4053 } else {
4054 Py_INCREF(orig_args);
4055 args = orig_args;
4056 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004057 args_owned = 1;
4058 /* Take what we have of the result and let the Unicode formatting
4059 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004060 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004061 if (_PyString_Resize(&result, rescnt))
4062 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004063 fmtcnt = PyString_GET_SIZE(format) - \
4064 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004065 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4066 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004067 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004068 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004069 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004070 if (v == NULL)
4071 goto error;
4072 /* Paste what we have (result) to what the Unicode formatting
4073 function returned (v) and return the result (or error) */
4074 w = PyUnicode_Concat(result, v);
4075 Py_DECREF(result);
4076 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004077 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004078 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004079#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004080
Guido van Rossume5372401993-03-16 12:15:04 +00004081 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004082 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004083 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004084 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004085 }
Guido van Rossume5372401993-03-16 12:15:04 +00004086 return NULL;
4087}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004088
Guido van Rossum2a61e741997-01-18 07:55:05 +00004089void
Fred Drakeba096332000-07-09 07:04:36 +00004090PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004091{
4092 register PyStringObject *s = (PyStringObject *)(*p);
4093 PyObject *t;
4094 if (s == NULL || !PyString_Check(s))
4095 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004096 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004097 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004098 if (interned == NULL) {
4099 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004100 if (interned == NULL) {
4101 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004102 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004103 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004104 }
4105 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4106 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004107 Py_DECREF(*p);
4108 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004109 return;
4110 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004111 /* Ensure that only true string objects appear in the intern dict */
4112 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004113 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4114 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004115 if (t == NULL) {
4116 PyErr_Clear();
4117 return;
Tim Peters111f6092001-09-12 07:54:51 +00004118 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004119 } else {
4120 t = (PyObject*) s;
4121 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004122 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004123
4124 if (PyDict_SetItem(interned, t, t) == 0) {
4125 /* The two references in interned are not counted by
4126 refcnt. The string deallocator will take care of this */
4127 ((PyObject *)t)->ob_refcnt-=2;
4128 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4129 Py_DECREF(*p);
4130 *p = t;
4131 return;
4132 }
4133 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004134 PyErr_Clear();
4135}
4136
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004137void
4138PyString_InternImmortal(PyObject **p)
4139{
4140 PyString_InternInPlace(p);
4141 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4142 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4143 Py_INCREF(*p);
4144 }
4145}
4146
Guido van Rossum2a61e741997-01-18 07:55:05 +00004147
4148PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004149PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004150{
4151 PyObject *s = PyString_FromString(cp);
4152 if (s == NULL)
4153 return NULL;
4154 PyString_InternInPlace(&s);
4155 return s;
4156}
4157
Guido van Rossum8cf04761997-08-02 02:57:45 +00004158void
Fred Drakeba096332000-07-09 07:04:36 +00004159PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004160{
4161 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004162 for (i = 0; i < UCHAR_MAX + 1; i++) {
4163 Py_XDECREF(characters[i]);
4164 characters[i] = NULL;
4165 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004166 Py_XDECREF(nullstring);
4167 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004168}
Barry Warsawa903ad982001-02-23 16:40:48 +00004169
Barry Warsawa903ad982001-02-23 16:40:48 +00004170void _Py_ReleaseInternedStrings(void)
4171{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004172 PyObject *keys;
4173 PyStringObject *s;
4174 int i, n;
4175
4176 if (interned == NULL || !PyDict_Check(interned))
4177 return;
4178 keys = PyDict_Keys(interned);
4179 if (keys == NULL || !PyList_Check(keys)) {
4180 PyErr_Clear();
4181 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004182 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004183
4184 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4185 detector, interned strings are not forcibly deallocated; rather, we
4186 give them their stolen references back, and then clear and DECREF
4187 the interned dict. */
4188
4189 fprintf(stderr, "releasing interned strings\n");
4190 n = PyList_GET_SIZE(keys);
4191 for (i = 0; i < n; i++) {
4192 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4193 switch (s->ob_sstate) {
4194 case SSTATE_NOT_INTERNED:
4195 /* XXX Shouldn't happen */
4196 break;
4197 case SSTATE_INTERNED_IMMORTAL:
4198 s->ob_refcnt += 1;
4199 break;
4200 case SSTATE_INTERNED_MORTAL:
4201 s->ob_refcnt += 2;
4202 break;
4203 default:
4204 Py_FatalError("Inconsistent interned string state.");
4205 }
4206 s->ob_sstate = SSTATE_NOT_INTERNED;
4207 }
4208 Py_DECREF(keys);
4209 PyDict_Clear(interned);
4210 Py_DECREF(interned);
4211 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004212}