blob: c11a362d25133500f96cad84538f37ebf1fdfbfa [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Martin v. Löwisfed24052002-10-07 13:55:50 +000029#if defined(HAVE_MBTOWC) && defined(HAVE_WCHAR_H)
30# define PRINT_MULTIBYTE_STRING
31# include <locale.h>
32# include <wchar.h>
33# if defined(HAVE_ISWPRINT)
34# define _isprint iswprint
35# else
36# define _isprint isprint
37# endif
38#endif
39
40static const char *hexchars = "0123456789abcdef";
41
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000042/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000043 For both PyString_FromString() and PyString_FromStringAndSize(), the
44 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000045 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000046
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000047 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000048 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000049
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000050 For PyString_FromStringAndSize(), the parameter the parameter `str' is
51 either NULL or else points to a string containing at least `size' bytes.
52 For PyString_FromStringAndSize(), the string in the `str' parameter does
53 not have to be null-terminated. (Therefore it is safe to construct a
54 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
55 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
56 bytes (setting the last byte to the null terminating character) and you can
57 fill in the data yourself. If `str' is non-NULL then the resulting
58 PyString object must be treated as immutable and you must not fill in nor
59 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000060
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000061 The PyObject member `op->ob_size', which denotes the number of "extra
62 items" in a variable-size object, will contain the number of bytes
63 allocated for string data, not counting the null terminating character. It
64 is therefore equal to the equal to the `size' parameter (for
65 PyString_FromStringAndSize()) or the length of the string in the `str'
66 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000069PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000070{
Tim Peters9e897f42001-05-09 07:37:07 +000071 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 if (size == 0 && (op = nullstring) != NULL) {
73#ifdef COUNT_ALLOCS
74 null_strings++;
75#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 Py_INCREF(op);
77 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000079 if (size == 1 && str != NULL &&
80 (op = characters[*str & UCHAR_MAX]) != NULL)
81 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082#ifdef COUNT_ALLOCS
83 one_strings++;
84#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000085 Py_INCREF(op);
86 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000088
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000089 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000091 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000092 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000094 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000096 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000097 if (str != NULL)
98 memcpy(op->ob_sval, str, size);
99 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +0000100 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000101 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000102 PyObject *t = (PyObject *)op;
103 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000104 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000105 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000106 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000107 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +0000108 PyObject *t = (PyObject *)op;
109 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000110 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000111 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000112 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000115}
116
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000118PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000119{
Tim Peters62de65b2001-12-06 20:29:32 +0000120 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000121 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000122
123 assert(str != NULL);
124 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000125 if (size > INT_MAX) {
126 PyErr_SetString(PyExc_OverflowError,
127 "string is too long for a Python string");
128 return NULL;
129 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 if (size == 0 && (op = nullstring) != NULL) {
131#ifdef COUNT_ALLOCS
132 null_strings++;
133#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000134 Py_INCREF(op);
135 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000136 }
137 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
138#ifdef COUNT_ALLOCS
139 one_strings++;
140#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000141 Py_INCREF(op);
142 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000144
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000145 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000146 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000147 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000148 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000150 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000151 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000152 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000153 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000154 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000155 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000156 PyObject *t = (PyObject *)op;
157 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000158 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000159 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000160 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000161 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000162 PyObject *t = (PyObject *)op;
163 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000164 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000165 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000166 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000167 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000168 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000169}
170
Barry Warsawdadace02001-08-24 18:32:06 +0000171PyObject *
172PyString_FromFormatV(const char *format, va_list vargs)
173{
Tim Petersc15c4f12001-10-02 21:32:07 +0000174 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000175 int n = 0;
176 const char* f;
177 char *s;
178 PyObject* string;
179
Tim Petersc15c4f12001-10-02 21:32:07 +0000180#ifdef VA_LIST_IS_ARRAY
181 memcpy(count, vargs, sizeof(va_list));
182#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000183#ifdef __va_copy
184 __va_copy(count, vargs);
185#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000186 count = vargs;
187#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000188#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000189 /* step 1: figure out how large a buffer we need */
190 for (f = format; *f; f++) {
191 if (*f == '%') {
192 const char* p = f;
193 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
194 ;
195
196 /* skip the 'l' in %ld, since it doesn't change the
197 width. although only %d is supported (see
198 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000200 if (*f == 'l' && *(f+1) == 'd')
201 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000202
Barry Warsawdadace02001-08-24 18:32:06 +0000203 switch (*f) {
204 case 'c':
205 (void)va_arg(count, int);
206 /* fall through... */
207 case '%':
208 n++;
209 break;
210 case 'd': case 'i': case 'x':
211 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000212 /* 20 bytes is enough to hold a 64-bit
213 integer. Decimal takes the most space.
214 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000215 n += 20;
216 break;
217 case 's':
218 s = va_arg(count, char*);
219 n += strlen(s);
220 break;
221 case 'p':
222 (void) va_arg(count, int);
223 /* maximum 64-bit pointer representation:
224 * 0xffffffffffffffff
225 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000226 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000227 */
228 n += 19;
229 break;
230 default:
231 /* if we stumble upon an unknown
232 formatting code, copy the rest of
233 the format string to the output
234 string. (we cannot just skip the
235 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000236 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000237 n += strlen(p);
238 goto expand;
239 }
240 } else
241 n++;
242 }
243 expand:
244 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000245 /* Since we've analyzed how much space we need for the worst case,
246 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000247 string = PyString_FromStringAndSize(NULL, n);
248 if (!string)
249 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000250
Barry Warsawdadace02001-08-24 18:32:06 +0000251 s = PyString_AsString(string);
252
253 for (f = format; *f; f++) {
254 if (*f == '%') {
255 const char* p = f++;
256 int i, longflag = 0;
257 /* parse the width.precision part (we're only
258 interested in the precision value, if any) */
259 n = 0;
260 while (isdigit(Py_CHARMASK(*f)))
261 n = (n*10) + *f++ - '0';
262 if (*f == '.') {
263 f++;
264 n = 0;
265 while (isdigit(Py_CHARMASK(*f)))
266 n = (n*10) + *f++ - '0';
267 }
268 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
269 f++;
270 /* handle the long flag, but only for %ld. others
271 can be added when necessary. */
272 if (*f == 'l' && *(f+1) == 'd') {
273 longflag = 1;
274 ++f;
275 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000276
Barry Warsawdadace02001-08-24 18:32:06 +0000277 switch (*f) {
278 case 'c':
279 *s++ = va_arg(vargs, int);
280 break;
281 case 'd':
282 if (longflag)
283 sprintf(s, "%ld", va_arg(vargs, long));
284 else
285 sprintf(s, "%d", va_arg(vargs, int));
286 s += strlen(s);
287 break;
288 case 'i':
289 sprintf(s, "%i", va_arg(vargs, int));
290 s += strlen(s);
291 break;
292 case 'x':
293 sprintf(s, "%x", va_arg(vargs, int));
294 s += strlen(s);
295 break;
296 case 's':
297 p = va_arg(vargs, char*);
298 i = strlen(p);
299 if (n > 0 && i > n)
300 i = n;
301 memcpy(s, p, i);
302 s += i;
303 break;
304 case 'p':
305 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000306 /* %p is ill-defined: ensure leading 0x. */
307 if (s[1] == 'X')
308 s[1] = 'x';
309 else if (s[1] != 'x') {
310 memmove(s+2, s, strlen(s)+1);
311 s[0] = '0';
312 s[1] = 'x';
313 }
Barry Warsawdadace02001-08-24 18:32:06 +0000314 s += strlen(s);
315 break;
316 case '%':
317 *s++ = '%';
318 break;
319 default:
320 strcpy(s, p);
321 s += strlen(s);
322 goto end;
323 }
324 } else
325 *s++ = *f;
326 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000327
Barry Warsawdadace02001-08-24 18:32:06 +0000328 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000329 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000330 return string;
331}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000334PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000335{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000336 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000337 va_list vargs;
338
339#ifdef HAVE_STDARG_PROTOTYPES
340 va_start(vargs, format);
341#else
342 va_start(vargs);
343#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000344 ret = PyString_FromFormatV(format, vargs);
345 va_end(vargs);
346 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000347}
348
349
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350PyObject *PyString_Decode(const char *s,
351 int size,
352 const char *encoding,
353 const char *errors)
354{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000355 PyObject *v, *str;
356
357 str = PyString_FromStringAndSize(s, size);
358 if (str == NULL)
359 return NULL;
360 v = PyString_AsDecodedString(str, encoding, errors);
361 Py_DECREF(str);
362 return v;
363}
364
365PyObject *PyString_AsDecodedObject(PyObject *str,
366 const char *encoding,
367 const char *errors)
368{
369 PyObject *v;
370
371 if (!PyString_Check(str)) {
372 PyErr_BadArgument();
373 goto onError;
374 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000375
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000376 if (encoding == NULL) {
377#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000378 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000379#else
380 PyErr_SetString(PyExc_ValueError, "no encoding specified");
381 goto onError;
382#endif
383 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384
385 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000386 v = PyCodec_Decode(str, encoding, errors);
387 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389
390 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000391
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393 return NULL;
394}
395
396PyObject *PyString_AsDecodedString(PyObject *str,
397 const char *encoding,
398 const char *errors)
399{
400 PyObject *v;
401
402 v = PyString_AsDecodedObject(str, encoding, errors);
403 if (v == NULL)
404 goto onError;
405
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000406#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000407 /* Convert Unicode to a string using the default encoding */
408 if (PyUnicode_Check(v)) {
409 PyObject *temp = v;
410 v = PyUnicode_AsEncodedString(v, NULL, NULL);
411 Py_DECREF(temp);
412 if (v == NULL)
413 goto onError;
414 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000415#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000416 if (!PyString_Check(v)) {
417 PyErr_Format(PyExc_TypeError,
418 "decoder did not return a string object (type=%.400s)",
419 v->ob_type->tp_name);
420 Py_DECREF(v);
421 goto onError;
422 }
423
424 return v;
425
426 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 return NULL;
428}
429
430PyObject *PyString_Encode(const char *s,
431 int size,
432 const char *encoding,
433 const char *errors)
434{
435 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000436
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 str = PyString_FromStringAndSize(s, size);
438 if (str == NULL)
439 return NULL;
440 v = PyString_AsEncodedString(str, encoding, errors);
441 Py_DECREF(str);
442 return v;
443}
444
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000445PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446 const char *encoding,
447 const char *errors)
448{
449 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000450
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 if (!PyString_Check(str)) {
452 PyErr_BadArgument();
453 goto onError;
454 }
455
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000456 if (encoding == NULL) {
457#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000458 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000459#else
460 PyErr_SetString(PyExc_ValueError, "no encoding specified");
461 goto onError;
462#endif
463 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000464
465 /* Encode via the codec registry */
466 v = PyCodec_Encode(str, encoding, errors);
467 if (v == NULL)
468 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000469
470 return v;
471
472 onError:
473 return NULL;
474}
475
476PyObject *PyString_AsEncodedString(PyObject *str,
477 const char *encoding,
478 const char *errors)
479{
480 PyObject *v;
481
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000482 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000483 if (v == NULL)
484 goto onError;
485
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000486#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 /* Convert Unicode to a string using the default encoding */
488 if (PyUnicode_Check(v)) {
489 PyObject *temp = v;
490 v = PyUnicode_AsEncodedString(v, NULL, NULL);
491 Py_DECREF(temp);
492 if (v == NULL)
493 goto onError;
494 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000495#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000496 if (!PyString_Check(v)) {
497 PyErr_Format(PyExc_TypeError,
498 "encoder did not return a string object (type=%.400s)",
499 v->ob_type->tp_name);
500 Py_DECREF(v);
501 goto onError;
502 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000503
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000504 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000505
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000506 onError:
507 return NULL;
508}
509
Guido van Rossum234f9421993-06-17 12:35:49 +0000510static void
Fred Drakeba096332000-07-09 07:04:36 +0000511string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000512{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000513 switch (PyString_CHECK_INTERNED(op)) {
514 case SSTATE_NOT_INTERNED:
515 break;
516
517 case SSTATE_INTERNED_MORTAL:
518 /* revive dead object temporarily for DelItem */
519 op->ob_refcnt = 3;
520 if (PyDict_DelItem(interned, op) != 0)
521 Py_FatalError(
522 "deletion of interned string failed");
523 break;
524
525 case SSTATE_INTERNED_IMMORTAL:
526 Py_FatalError("Immortal interned string died.");
527
528 default:
529 Py_FatalError("Inconsistent interned string state.");
530 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000531 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000532}
533
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534/* Unescape a backslash-escaped string. If unicode is non-zero,
535 the string is a u-literal. If recode_encoding is non-zero,
536 the string is UTF-8 encoded and should be re-encoded in the
537 specified encoding. */
538
539PyObject *PyString_DecodeEscape(const char *s,
540 int len,
541 const char *errors,
542 int unicode,
543 const char *recode_encoding)
544{
545 int c;
546 char *p, *buf;
547 const char *end;
548 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000549 int newlen = recode_encoding ? 4*len:len;
550 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000551 if (v == NULL)
552 return NULL;
553 p = buf = PyString_AsString(v);
554 end = s + len;
555 while (s < end) {
556 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000557 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000558#ifdef Py_USING_UNICODE
559 if (recode_encoding && (*s & 0x80)) {
560 PyObject *u, *w;
561 char *r;
562 const char* t;
563 int rn;
564 t = s;
565 /* Decode non-ASCII bytes as UTF-8. */
566 while (t < end && (*t & 0x80)) t++;
567 u = PyUnicode_DecodeUTF8(s, t - s, errors);
568 if(!u) goto failed;
569
570 /* Recode them in target encoding. */
571 w = PyUnicode_AsEncodedString(
572 u, recode_encoding, errors);
573 Py_DECREF(u);
574 if (!w) goto failed;
575
576 /* Append bytes to output buffer. */
577 r = PyString_AsString(w);
578 rn = PyString_Size(w);
579 memcpy(p, r, rn);
580 p += rn;
581 Py_DECREF(w);
582 s = t;
583 } else {
584 *p++ = *s++;
585 }
586#else
587 *p++ = *s++;
588#endif
589 continue;
590 }
591 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000592 if (s==end) {
593 PyErr_SetString(PyExc_ValueError,
594 "Trailing \\ in string");
595 goto failed;
596 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000597 switch (*s++) {
598 /* XXX This assumes ASCII! */
599 case '\n': break;
600 case '\\': *p++ = '\\'; break;
601 case '\'': *p++ = '\''; break;
602 case '\"': *p++ = '\"'; break;
603 case 'b': *p++ = '\b'; break;
604 case 'f': *p++ = '\014'; break; /* FF */
605 case 't': *p++ = '\t'; break;
606 case 'n': *p++ = '\n'; break;
607 case 'r': *p++ = '\r'; break;
608 case 'v': *p++ = '\013'; break; /* VT */
609 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
610 case '0': case '1': case '2': case '3':
611 case '4': case '5': case '6': case '7':
612 c = s[-1] - '0';
613 if ('0' <= *s && *s <= '7') {
614 c = (c<<3) + *s++ - '0';
615 if ('0' <= *s && *s <= '7')
616 c = (c<<3) + *s++ - '0';
617 }
618 *p++ = c;
619 break;
620 case 'x':
621 if (isxdigit(Py_CHARMASK(s[0]))
622 && isxdigit(Py_CHARMASK(s[1]))) {
623 unsigned int x = 0;
624 c = Py_CHARMASK(*s);
625 s++;
626 if (isdigit(c))
627 x = c - '0';
628 else if (islower(c))
629 x = 10 + c - 'a';
630 else
631 x = 10 + c - 'A';
632 x = x << 4;
633 c = Py_CHARMASK(*s);
634 s++;
635 if (isdigit(c))
636 x += c - '0';
637 else if (islower(c))
638 x += 10 + c - 'a';
639 else
640 x += 10 + c - 'A';
641 *p++ = x;
642 break;
643 }
644 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000645 PyErr_SetString(PyExc_ValueError,
646 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000647 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 }
649 if (strcmp(errors, "replace") == 0) {
650 *p++ = '?';
651 } else if (strcmp(errors, "ignore") == 0)
652 /* do nothing */;
653 else {
654 PyErr_Format(PyExc_ValueError,
655 "decoding error; "
656 "unknown error handling code: %.400s",
657 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000658 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000659 }
660#ifndef Py_USING_UNICODE
661 case 'u':
662 case 'U':
663 case 'N':
664 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000665 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 "Unicode escapes not legal "
667 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000668 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000669 }
670#endif
671 default:
672 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000673 s--;
674 goto non_esc; /* an arbitry number of unescaped
675 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 }
677 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000678 if (p-buf < newlen)
679 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000680 return v;
681 failed:
682 Py_DECREF(v);
683 return NULL;
684}
685
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000686static int
687string_getsize(register PyObject *op)
688{
689 char *s;
690 int len;
691 if (PyString_AsStringAndSize(op, &s, &len))
692 return -1;
693 return len;
694}
695
696static /*const*/ char *
697string_getbuffer(register PyObject *op)
698{
699 char *s;
700 int len;
701 if (PyString_AsStringAndSize(op, &s, &len))
702 return NULL;
703 return s;
704}
705
Guido van Rossumd7047b31995-01-02 19:07:15 +0000706int
Fred Drakeba096332000-07-09 07:04:36 +0000707PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000708{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709 if (!PyString_Check(op))
710 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000711 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712}
713
714/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000715PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000717 if (!PyString_Check(op))
718 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000719 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000720}
721
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000722int
723PyString_AsStringAndSize(register PyObject *obj,
724 register char **s,
725 register int *len)
726{
727 if (s == NULL) {
728 PyErr_BadInternalCall();
729 return -1;
730 }
731
732 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000733#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000734 if (PyUnicode_Check(obj)) {
735 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
736 if (obj == NULL)
737 return -1;
738 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000739 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000740#endif
741 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000742 PyErr_Format(PyExc_TypeError,
743 "expected string or Unicode object, "
744 "%.200s found", obj->ob_type->tp_name);
745 return -1;
746 }
747 }
748
749 *s = PyString_AS_STRING(obj);
750 if (len != NULL)
751 *len = PyString_GET_SIZE(obj);
752 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
753 PyErr_SetString(PyExc_TypeError,
754 "expected string without null bytes");
755 return -1;
756 }
757 return 0;
758}
759
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000760/* Methods */
761
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000762static int
Fred Drakeba096332000-07-09 07:04:36 +0000763string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000764{
Martin v. Löwisfed24052002-10-07 13:55:50 +0000765#ifndef PRINT_MULTIBYTE_STRING
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000766 int i;
767 char c;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000768#else
769 char *scur, *send;
770 wchar_t c;
771 int cr;
772#endif
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000773 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000774
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000775 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000776 if (! PyString_CheckExact(op)) {
777 int ret;
778 /* A str subclass may have its own __str__ method. */
779 op = (PyStringObject *) PyObject_Str((PyObject *)op);
780 if (op == NULL)
781 return -1;
782 ret = string_print(op, fp, flags);
783 Py_DECREF(op);
784 return ret;
785 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000786 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000790
Thomas Wouters7e474022000-07-16 12:04:32 +0000791 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000792 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000793 if (memchr(op->ob_sval, '\'', op->ob_size) &&
794 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795 quote = '"';
796
797 fputc(quote, fp);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000798#ifndef PRINT_MULTIBYTE_STRING
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799 for (i = 0; i < op->ob_size; i++) {
800 c = op->ob_sval[i];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000801#else
802 for (scur = op->ob_sval, send = op->ob_sval + op->ob_size;
803 scur < send; scur += cr) {
804 if ((cr = mbtowc(&c, scur, send - scur)) <= 0)
805 goto non_printable;
806#endif
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000807 if (c == quote || c == '\\')
Martin v. Löwisfed24052002-10-07 13:55:50 +0000808 fputc('\\', fp), fputc(c, fp);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000809 else if (c == '\t')
Martin v. Löwisfed24052002-10-07 13:55:50 +0000810 fputs("\\t", fp);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000811 else if (c == '\n')
Martin v. Löwisfed24052002-10-07 13:55:50 +0000812 fputs("\\n", fp);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000813 else if (c == '\r')
Martin v. Löwisfed24052002-10-07 13:55:50 +0000814 fputs("\\r", fp);
815#ifndef PRINT_MULTIBYTE_STRING
816 else if (' ' <= c && c < 0x7f)
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 fputc(c, fp);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000818 else
819 fprintf(fp, "\\x%02x", c & 0xff);
820#else
821 else if (_isprint(c))
822 fwrite(scur, cr, 1, fp);
823 else {
824non_printable: cr = 1; /* unit to move cursor */
825 fprintf(fp, "\\x%02x", *scur & 0xff);
826 }
827#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000829 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000830 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831}
832
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000833PyObject *
834PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000836 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000837 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
838 PyObject *v;
839 if (newsize > INT_MAX) {
840 PyErr_SetString(PyExc_OverflowError,
841 "string is too large to make repr");
842 }
843 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000845 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000846 }
847 else {
Martin v. Löwisfed24052002-10-07 13:55:50 +0000848#ifndef PRINT_MULTIBYTE_STRING
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 register int i;
850 register char c;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000851#else
852 register char *scur, *send;
853 wchar_t c;
854 int cr;
855#endif
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000857 int quote;
858
Thomas Wouters7e474022000-07-16 12:04:32 +0000859 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000860 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000861 if (smartquotes &&
862 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000863 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000864 quote = '"';
865
Tim Peters9161c8b2001-12-03 01:55:38 +0000866 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000867 *p++ = quote;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000868#ifndef PRINT_MULTIBYTE_STRING
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000870 /* There's at least enough room for a hex escape
871 and a closing quote. */
872 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873 c = op->ob_sval[i];
Martin v. Löwisfed24052002-10-07 13:55:50 +0000874#else
875 for (scur = op->ob_sval, send = op->ob_sval + op->ob_size;
876 scur < send; scur += cr) {
877 if ((cr = mbtowc(&c, scur, send - scur)) <= 0)
878 goto non_printable;
879#endif
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000880 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000882 else if (c == '\t')
883 *p++ = '\\', *p++ = 't';
884 else if (c == '\n')
885 *p++ = '\\', *p++ = 'n';
886 else if (c == '\r')
887 *p++ = '\\', *p++ = 'r';
Martin v. Löwisfed24052002-10-07 13:55:50 +0000888#ifndef PRINT_MULTIBYTE_STRING
889 else if (' ' <= c && c < 0x7f)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890 *p++ = c;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000891 else {
892#else
893 else if (_isprint(c))
894 memcpy(p, scur, cr), p += cr;
895 else {
896non_printable: cr = 1; c = *scur;
897#endif
898 *p++ = '\\'; *p++ = 'x';
899 *p++ = hexchars[(c >> 4) & 0x0f];
900 *p++ = hexchars[c & 0x0f];
901 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000902 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000903 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000904 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000905 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000906 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000907 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000908 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000910}
911
Guido van Rossum189f1df2001-05-01 16:51:53 +0000912static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000913string_repr(PyObject *op)
914{
915 return PyString_Repr(op, 1);
916}
917
918static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000919string_str(PyObject *s)
920{
Tim Petersc9933152001-10-16 20:18:24 +0000921 assert(PyString_Check(s));
922 if (PyString_CheckExact(s)) {
923 Py_INCREF(s);
924 return s;
925 }
926 else {
927 /* Subtype -- return genuine string with the same value. */
928 PyStringObject *t = (PyStringObject *) s;
929 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
930 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000931}
932
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000933static int
Fred Drakeba096332000-07-09 07:04:36 +0000934string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000935{
936 return a->ob_size;
937}
938
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000939static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000940string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000941{
942 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000943 register PyStringObject *op;
944 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000945#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000946 if (PyUnicode_Check(bb))
947 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000948#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000949 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000950 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000951 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000952 return NULL;
953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000955 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000956 if ((a->ob_size == 0 || b->ob_size == 0) &&
957 PyString_CheckExact(a) && PyString_CheckExact(b)) {
958 if (a->ob_size == 0) {
959 Py_INCREF(bb);
960 return bb;
961 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962 Py_INCREF(a);
963 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000964 }
965 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000966 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000968 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000969 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000970 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000971 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000972 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000973 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000974 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
975 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
976 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978#undef b
979}
980
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000982string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000983{
984 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000985 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000986 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000987 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000988 if (n < 0)
989 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000990 /* watch out for overflows: the size can overflow int,
991 * and the # of bytes needed can overflow size_t
992 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000994 if (n && size / n != a->ob_size) {
995 PyErr_SetString(PyExc_OverflowError,
996 "repeated string is too long");
997 return NULL;
998 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000999 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001000 Py_INCREF(a);
1001 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001002 }
Tim Peters8f422462000-09-09 06:13:41 +00001003 nbytes = size * sizeof(char);
1004 if (nbytes / sizeof(char) != (size_t)size ||
1005 nbytes + sizeof(PyStringObject) <= nbytes) {
1006 PyErr_SetString(PyExc_OverflowError,
1007 "repeated string is too long");
1008 return NULL;
1009 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001010 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001011 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001012 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001013 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001014 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001015 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001016 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001017 for (i = 0; i < size; i += a->ob_size)
1018 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
1019 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001021}
1022
1023/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1024
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001025static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001026string_slice(register PyStringObject *a, register int i, register int j)
1027 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028{
1029 if (i < 0)
1030 i = 0;
1031 if (j < 0)
1032 j = 0; /* Avoid signed/unsigned bug in next line */
1033 if (j > a->ob_size)
1034 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001035 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1036 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001037 Py_INCREF(a);
1038 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001039 }
1040 if (j < i)
1041 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001042 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001043}
1044
Guido van Rossum9284a572000-03-07 15:53:43 +00001045static int
Fred Drakeba096332000-07-09 07:04:36 +00001046string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001047{
Barry Warsaw817918c2002-08-06 16:58:21 +00001048 const char *lhs, *rhs, *end;
1049 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001050
1051 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001052#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001053 if (PyUnicode_Check(el))
1054 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001055#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001056 if (!PyString_Check(el)) {
1057 PyErr_SetString(PyExc_TypeError,
1058 "'in <string>' requires string as left operand");
1059 return -1;
1060 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001061 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001062 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001063 rhs = PyString_AS_STRING(el);
1064 lhs = PyString_AS_STRING(a);
1065
1066 /* optimize for a single character */
1067 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001068 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001069
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001070 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001071 while (lhs <= end) {
1072 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001073 return 1;
1074 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001075
Guido van Rossum9284a572000-03-07 15:53:43 +00001076 return 0;
1077}
1078
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001079static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001080string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001081{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001082 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001083 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001084 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001085 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001086 return NULL;
1087 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001088 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001089 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001090 if (v == NULL)
1091 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001092 else {
1093#ifdef COUNT_ALLOCS
1094 one_strings++;
1095#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001096 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001097 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001098 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001099}
1100
Martin v. Löwiscd353062001-05-24 16:56:35 +00001101static PyObject*
1102string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001103{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001104 int c;
1105 int len_a, len_b;
1106 int min_len;
1107 PyObject *result;
1108
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001109 /* Make sure both arguments are strings. */
1110 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001111 result = Py_NotImplemented;
1112 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001113 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001114 if (a == b) {
1115 switch (op) {
1116 case Py_EQ:case Py_LE:case Py_GE:
1117 result = Py_True;
1118 goto out;
1119 case Py_NE:case Py_LT:case Py_GT:
1120 result = Py_False;
1121 goto out;
1122 }
1123 }
1124 if (op == Py_EQ) {
1125 /* Supporting Py_NE here as well does not save
1126 much time, since Py_NE is rarely used. */
1127 if (a->ob_size == b->ob_size
1128 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001129 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001130 a->ob_size) == 0)) {
1131 result = Py_True;
1132 } else {
1133 result = Py_False;
1134 }
1135 goto out;
1136 }
1137 len_a = a->ob_size; len_b = b->ob_size;
1138 min_len = (len_a < len_b) ? len_a : len_b;
1139 if (min_len > 0) {
1140 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1141 if (c==0)
1142 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1143 }else
1144 c = 0;
1145 if (c == 0)
1146 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1147 switch (op) {
1148 case Py_LT: c = c < 0; break;
1149 case Py_LE: c = c <= 0; break;
1150 case Py_EQ: assert(0); break; /* unreachable */
1151 case Py_NE: c = c != 0; break;
1152 case Py_GT: c = c > 0; break;
1153 case Py_GE: c = c >= 0; break;
1154 default:
1155 result = Py_NotImplemented;
1156 goto out;
1157 }
1158 result = c ? Py_True : Py_False;
1159 out:
1160 Py_INCREF(result);
1161 return result;
1162}
1163
1164int
1165_PyString_Eq(PyObject *o1, PyObject *o2)
1166{
1167 PyStringObject *a, *b;
1168 a = (PyStringObject*)o1;
1169 b = (PyStringObject*)o2;
1170 return a->ob_size == b->ob_size
1171 && *a->ob_sval == *b->ob_sval
1172 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001173}
1174
Guido van Rossum9bfef441993-03-29 10:43:31 +00001175static long
Fred Drakeba096332000-07-09 07:04:36 +00001176string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001177{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001178 register int len;
1179 register unsigned char *p;
1180 register long x;
1181
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001182 if (a->ob_shash != -1)
1183 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001184 len = a->ob_size;
1185 p = (unsigned char *) a->ob_sval;
1186 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001187 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001188 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001189 x ^= a->ob_size;
1190 if (x == -1)
1191 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001192 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001193 return x;
1194}
1195
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001196static PyObject*
1197string_subscript(PyStringObject* self, PyObject* item)
1198{
1199 if (PyInt_Check(item)) {
1200 long i = PyInt_AS_LONG(item);
1201 if (i < 0)
1202 i += PyString_GET_SIZE(self);
1203 return string_item(self,i);
1204 }
1205 else if (PyLong_Check(item)) {
1206 long i = PyLong_AsLong(item);
1207 if (i == -1 && PyErr_Occurred())
1208 return NULL;
1209 if (i < 0)
1210 i += PyString_GET_SIZE(self);
1211 return string_item(self,i);
1212 }
1213 else if (PySlice_Check(item)) {
1214 int start, stop, step, slicelength, cur, i;
1215 char* source_buf;
1216 char* result_buf;
1217 PyObject* result;
1218
1219 if (PySlice_GetIndicesEx((PySliceObject*)item,
1220 PyString_GET_SIZE(self),
1221 &start, &stop, &step, &slicelength) < 0) {
1222 return NULL;
1223 }
1224
1225 if (slicelength <= 0) {
1226 return PyString_FromStringAndSize("", 0);
1227 }
1228 else {
1229 source_buf = PyString_AsString((PyObject*)self);
1230 result_buf = PyMem_Malloc(slicelength);
1231
1232 for (cur = start, i = 0; i < slicelength;
1233 cur += step, i++) {
1234 result_buf[i] = source_buf[cur];
1235 }
1236
1237 result = PyString_FromStringAndSize(result_buf,
1238 slicelength);
1239 PyMem_Free(result_buf);
1240 return result;
1241 }
1242 }
1243 else {
1244 PyErr_SetString(PyExc_TypeError,
1245 "string indices must be integers");
1246 return NULL;
1247 }
1248}
1249
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001250static int
Fred Drakeba096332000-07-09 07:04:36 +00001251string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001252{
1253 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001254 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001255 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001256 return -1;
1257 }
1258 *ptr = (void *)self->ob_sval;
1259 return self->ob_size;
1260}
1261
1262static int
Fred Drakeba096332000-07-09 07:04:36 +00001263string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001264{
Guido van Rossum045e6881997-09-08 18:30:11 +00001265 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001266 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001267 return -1;
1268}
1269
1270static int
Fred Drakeba096332000-07-09 07:04:36 +00001271string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001272{
1273 if ( lenp )
1274 *lenp = self->ob_size;
1275 return 1;
1276}
1277
Guido van Rossum1db70701998-10-08 02:18:52 +00001278static int
Fred Drakeba096332000-07-09 07:04:36 +00001279string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001280{
1281 if ( index != 0 ) {
1282 PyErr_SetString(PyExc_SystemError,
1283 "accessing non-existent string segment");
1284 return -1;
1285 }
1286 *ptr = self->ob_sval;
1287 return self->ob_size;
1288}
1289
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001290static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001291 (inquiry)string_length, /*sq_length*/
1292 (binaryfunc)string_concat, /*sq_concat*/
1293 (intargfunc)string_repeat, /*sq_repeat*/
1294 (intargfunc)string_item, /*sq_item*/
1295 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001296 0, /*sq_ass_item*/
1297 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001298 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001299};
1300
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001301static PyMappingMethods string_as_mapping = {
1302 (inquiry)string_length,
1303 (binaryfunc)string_subscript,
1304 0,
1305};
1306
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001307static PyBufferProcs string_as_buffer = {
1308 (getreadbufferproc)string_buffer_getreadbuf,
1309 (getwritebufferproc)string_buffer_getwritebuf,
1310 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001311 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001312};
1313
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001314
1315
1316#define LEFTSTRIP 0
1317#define RIGHTSTRIP 1
1318#define BOTHSTRIP 2
1319
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001320/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001321static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1322
1323#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001324
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001325
1326static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001327split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001329 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 PyObject* item;
1331 PyObject *list = PyList_New(0);
1332
1333 if (list == NULL)
1334 return NULL;
1335
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336 for (i = j = 0; i < len; ) {
1337 while (i < len && isspace(Py_CHARMASK(s[i])))
1338 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001340 while (i < len && !isspace(Py_CHARMASK(s[i])))
1341 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001343 if (maxsplit-- <= 0)
1344 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1346 if (item == NULL)
1347 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348 err = PyList_Append(list, item);
1349 Py_DECREF(item);
1350 if (err < 0)
1351 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001352 while (i < len && isspace(Py_CHARMASK(s[i])))
1353 i++;
1354 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001355 }
1356 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001357 if (j < len) {
1358 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1359 if (item == NULL)
1360 goto finally;
1361 err = PyList_Append(list, item);
1362 Py_DECREF(item);
1363 if (err < 0)
1364 goto finally;
1365 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001366 return list;
1367 finally:
1368 Py_DECREF(list);
1369 return NULL;
1370}
1371
1372
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001373PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374"S.split([sep [,maxsplit]]) -> list of strings\n\
1375\n\
1376Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001377delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001378splits are done. If sep is not specified or is None, any\n\
1379whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001380
1381static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001382string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383{
1384 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001385 int maxsplit = -1;
1386 const char *s = PyString_AS_STRING(self), *sub;
1387 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001388
Guido van Rossum4c08d552000-03-10 22:55:18 +00001389 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001390 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001391 if (maxsplit < 0)
1392 maxsplit = INT_MAX;
1393 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001394 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001395 if (PyString_Check(subobj)) {
1396 sub = PyString_AS_STRING(subobj);
1397 n = PyString_GET_SIZE(subobj);
1398 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001399#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001400 else if (PyUnicode_Check(subobj))
1401 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001402#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001403 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1404 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405 if (n == 0) {
1406 PyErr_SetString(PyExc_ValueError, "empty separator");
1407 return NULL;
1408 }
1409
1410 list = PyList_New(0);
1411 if (list == NULL)
1412 return NULL;
1413
1414 i = j = 0;
1415 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001416 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001417 if (maxsplit-- <= 0)
1418 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1420 if (item == NULL)
1421 goto fail;
1422 err = PyList_Append(list, item);
1423 Py_DECREF(item);
1424 if (err < 0)
1425 goto fail;
1426 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001427 }
1428 else
1429 i++;
1430 }
1431 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1432 if (item == NULL)
1433 goto fail;
1434 err = PyList_Append(list, item);
1435 Py_DECREF(item);
1436 if (err < 0)
1437 goto fail;
1438
1439 return list;
1440
1441 fail:
1442 Py_DECREF(list);
1443 return NULL;
1444}
1445
1446
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001447PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448"S.join(sequence) -> string\n\
1449\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001450Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001451sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452
1453static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001454string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455{
1456 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001457 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459 char *p;
1460 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001461 size_t sz = 0;
1462 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001463 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001464
Tim Peters19fe14e2001-01-19 03:03:47 +00001465 seq = PySequence_Fast(orig, "");
1466 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001467 if (PyErr_ExceptionMatches(PyExc_TypeError))
1468 PyErr_Format(PyExc_TypeError,
1469 "sequence expected, %.80s found",
1470 orig->ob_type->tp_name);
1471 return NULL;
1472 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001473
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001474 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001475 if (seqlen == 0) {
1476 Py_DECREF(seq);
1477 return PyString_FromString("");
1478 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001480 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001481 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1482 PyErr_Format(PyExc_TypeError,
1483 "sequence item 0: expected string,"
1484 " %.80s found",
1485 item->ob_type->tp_name);
1486 Py_DECREF(seq);
1487 return NULL;
1488 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001489 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001490 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001491 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001493
Tim Peters19fe14e2001-01-19 03:03:47 +00001494 /* There are at least two things to join. Do a pre-pass to figure out
1495 * the total amount of space we'll need (sz), see whether any argument
1496 * is absurd, and defer to the Unicode join if appropriate.
1497 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001498 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001499 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001500 item = PySequence_Fast_GET_ITEM(seq, i);
1501 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001502#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001503 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001504 /* Defer to Unicode join.
1505 * CAUTION: There's no gurantee that the
1506 * original sequence can be iterated over
1507 * again, so we must pass seq here.
1508 */
1509 PyObject *result;
1510 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001511 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001512 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001513 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001514#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001515 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001516 "sequence item %i: expected string,"
1517 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001518 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001519 Py_DECREF(seq);
1520 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001521 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001522 sz += PyString_GET_SIZE(item);
1523 if (i != 0)
1524 sz += seplen;
1525 if (sz < old_sz || sz > INT_MAX) {
1526 PyErr_SetString(PyExc_OverflowError,
1527 "join() is too long for a Python string");
1528 Py_DECREF(seq);
1529 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001530 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001531 }
1532
1533 /* Allocate result space. */
1534 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1535 if (res == NULL) {
1536 Py_DECREF(seq);
1537 return NULL;
1538 }
1539
1540 /* Catenate everything. */
1541 p = PyString_AS_STRING(res);
1542 for (i = 0; i < seqlen; ++i) {
1543 size_t n;
1544 item = PySequence_Fast_GET_ITEM(seq, i);
1545 n = PyString_GET_SIZE(item);
1546 memcpy(p, PyString_AS_STRING(item), n);
1547 p += n;
1548 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001549 memcpy(p, sep, seplen);
1550 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001551 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001553
Jeremy Hylton49048292000-07-11 03:28:17 +00001554 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001556}
1557
Tim Peters52e155e2001-06-16 05:42:57 +00001558PyObject *
1559_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001560{
Tim Petersa7259592001-06-16 05:11:17 +00001561 assert(sep != NULL && PyString_Check(sep));
1562 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001563 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001564}
1565
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001566static void
1567string_adjust_indices(int *start, int *end, int len)
1568{
1569 if (*end > len)
1570 *end = len;
1571 else if (*end < 0)
1572 *end += len;
1573 if (*end < 0)
1574 *end = 0;
1575 if (*start < 0)
1576 *start += len;
1577 if (*start < 0)
1578 *start = 0;
1579}
1580
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001581static long
Fred Drakeba096332000-07-09 07:04:36 +00001582string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585 int len = PyString_GET_SIZE(self);
1586 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001587 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001588
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001589 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001590 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001591 return -2;
1592 if (PyString_Check(subobj)) {
1593 sub = PyString_AS_STRING(subobj);
1594 n = PyString_GET_SIZE(subobj);
1595 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001596#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001597 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001598 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001599#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001600 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601 return -2;
1602
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001603 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604
Guido van Rossum4c08d552000-03-10 22:55:18 +00001605 if (dir > 0) {
1606 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001608 last -= n;
1609 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001610 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001611 return (long)i;
1612 }
1613 else {
1614 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001615
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 if (n == 0 && i <= last)
1617 return (long)last;
1618 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001619 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001620 return (long)j;
1621 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001622
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623 return -1;
1624}
1625
1626
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001627PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628"S.find(sub [,start [,end]]) -> int\n\
1629\n\
1630Return the lowest index in S where substring sub is found,\n\
1631such that sub is contained within s[start,end]. Optional\n\
1632arguments start and end are interpreted as in slice notation.\n\
1633\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001634Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635
1636static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001637string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001639 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640 if (result == -2)
1641 return NULL;
1642 return PyInt_FromLong(result);
1643}
1644
1645
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001646PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647"S.index(sub [,start [,end]]) -> int\n\
1648\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001649Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001650
1651static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001652string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001654 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001655 if (result == -2)
1656 return NULL;
1657 if (result == -1) {
1658 PyErr_SetString(PyExc_ValueError,
1659 "substring not found in string.index");
1660 return NULL;
1661 }
1662 return PyInt_FromLong(result);
1663}
1664
1665
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001666PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001667"S.rfind(sub [,start [,end]]) -> int\n\
1668\n\
1669Return the highest index in S where substring sub is found,\n\
1670such that sub is contained within s[start,end]. Optional\n\
1671arguments start and end are interpreted as in slice notation.\n\
1672\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001673Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001674
1675static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001676string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001677{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001678 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001679 if (result == -2)
1680 return NULL;
1681 return PyInt_FromLong(result);
1682}
1683
1684
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001685PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001686"S.rindex(sub [,start [,end]]) -> int\n\
1687\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001688Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689
1690static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001691string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001693 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694 if (result == -2)
1695 return NULL;
1696 if (result == -1) {
1697 PyErr_SetString(PyExc_ValueError,
1698 "substring not found in string.rindex");
1699 return NULL;
1700 }
1701 return PyInt_FromLong(result);
1702}
1703
1704
1705static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001706do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1707{
1708 char *s = PyString_AS_STRING(self);
1709 int len = PyString_GET_SIZE(self);
1710 char *sep = PyString_AS_STRING(sepobj);
1711 int seplen = PyString_GET_SIZE(sepobj);
1712 int i, j;
1713
1714 i = 0;
1715 if (striptype != RIGHTSTRIP) {
1716 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1717 i++;
1718 }
1719 }
1720
1721 j = len;
1722 if (striptype != LEFTSTRIP) {
1723 do {
1724 j--;
1725 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1726 j++;
1727 }
1728
1729 if (i == 0 && j == len && PyString_CheckExact(self)) {
1730 Py_INCREF(self);
1731 return (PyObject*)self;
1732 }
1733 else
1734 return PyString_FromStringAndSize(s+i, j-i);
1735}
1736
1737
1738static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001739do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740{
1741 char *s = PyString_AS_STRING(self);
1742 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744 i = 0;
1745 if (striptype != RIGHTSTRIP) {
1746 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1747 i++;
1748 }
1749 }
1750
1751 j = len;
1752 if (striptype != LEFTSTRIP) {
1753 do {
1754 j--;
1755 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1756 j++;
1757 }
1758
Tim Peters8fa5dd02001-09-12 02:18:30 +00001759 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760 Py_INCREF(self);
1761 return (PyObject*)self;
1762 }
1763 else
1764 return PyString_FromStringAndSize(s+i, j-i);
1765}
1766
1767
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001768static PyObject *
1769do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1770{
1771 PyObject *sep = NULL;
1772
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001773 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001774 return NULL;
1775
1776 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001777 if (PyString_Check(sep))
1778 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001779#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001780 else if (PyUnicode_Check(sep)) {
1781 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1782 PyObject *res;
1783 if (uniself==NULL)
1784 return NULL;
1785 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1786 striptype, sep);
1787 Py_DECREF(uniself);
1788 return res;
1789 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001790#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001791 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001792 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001793#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001794 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001795#else
1796 "%s arg must be None or str",
1797#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001798 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001799 return NULL;
1800 }
1801 return do_xstrip(self, striptype, sep);
1802 }
1803
1804 return do_strip(self, striptype);
1805}
1806
1807
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001808PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001809"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810\n\
1811Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001812whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001813If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001814If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815
1816static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001817string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001819 if (PyTuple_GET_SIZE(args) == 0)
1820 return do_strip(self, BOTHSTRIP); /* Common case */
1821 else
1822 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823}
1824
1825
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001826PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001827"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001829Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001830If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001831If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832
1833static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001834string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001836 if (PyTuple_GET_SIZE(args) == 0)
1837 return do_strip(self, LEFTSTRIP); /* Common case */
1838 else
1839 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840}
1841
1842
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001843PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001844"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001846Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001847If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001848If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849
1850static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001851string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001853 if (PyTuple_GET_SIZE(args) == 0)
1854 return do_strip(self, RIGHTSTRIP); /* Common case */
1855 else
1856 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857}
1858
1859
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001860PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861"S.lower() -> string\n\
1862\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001863Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864
1865static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001866string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867{
1868 char *s = PyString_AS_STRING(self), *s_new;
1869 int i, n = PyString_GET_SIZE(self);
1870 PyObject *new;
1871
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001872 new = PyString_FromStringAndSize(NULL, n);
1873 if (new == NULL)
1874 return NULL;
1875 s_new = PyString_AsString(new);
1876 for (i = 0; i < n; i++) {
1877 int c = Py_CHARMASK(*s++);
1878 if (isupper(c)) {
1879 *s_new = tolower(c);
1880 } else
1881 *s_new = c;
1882 s_new++;
1883 }
1884 return new;
1885}
1886
1887
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001888PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889"S.upper() -> string\n\
1890\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001891Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892
1893static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001894string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895{
1896 char *s = PyString_AS_STRING(self), *s_new;
1897 int i, n = PyString_GET_SIZE(self);
1898 PyObject *new;
1899
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 new = PyString_FromStringAndSize(NULL, n);
1901 if (new == NULL)
1902 return NULL;
1903 s_new = PyString_AsString(new);
1904 for (i = 0; i < n; i++) {
1905 int c = Py_CHARMASK(*s++);
1906 if (islower(c)) {
1907 *s_new = toupper(c);
1908 } else
1909 *s_new = c;
1910 s_new++;
1911 }
1912 return new;
1913}
1914
1915
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001916PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001917"S.title() -> string\n\
1918\n\
1919Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001920characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001921
1922static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001923string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001924{
1925 char *s = PyString_AS_STRING(self), *s_new;
1926 int i, n = PyString_GET_SIZE(self);
1927 int previous_is_cased = 0;
1928 PyObject *new;
1929
Guido van Rossum4c08d552000-03-10 22:55:18 +00001930 new = PyString_FromStringAndSize(NULL, n);
1931 if (new == NULL)
1932 return NULL;
1933 s_new = PyString_AsString(new);
1934 for (i = 0; i < n; i++) {
1935 int c = Py_CHARMASK(*s++);
1936 if (islower(c)) {
1937 if (!previous_is_cased)
1938 c = toupper(c);
1939 previous_is_cased = 1;
1940 } else if (isupper(c)) {
1941 if (previous_is_cased)
1942 c = tolower(c);
1943 previous_is_cased = 1;
1944 } else
1945 previous_is_cased = 0;
1946 *s_new++ = c;
1947 }
1948 return new;
1949}
1950
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001951PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952"S.capitalize() -> string\n\
1953\n\
1954Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001955capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956
1957static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001958string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959{
1960 char *s = PyString_AS_STRING(self), *s_new;
1961 int i, n = PyString_GET_SIZE(self);
1962 PyObject *new;
1963
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964 new = PyString_FromStringAndSize(NULL, n);
1965 if (new == NULL)
1966 return NULL;
1967 s_new = PyString_AsString(new);
1968 if (0 < n) {
1969 int c = Py_CHARMASK(*s++);
1970 if (islower(c))
1971 *s_new = toupper(c);
1972 else
1973 *s_new = c;
1974 s_new++;
1975 }
1976 for (i = 1; i < n; i++) {
1977 int c = Py_CHARMASK(*s++);
1978 if (isupper(c))
1979 *s_new = tolower(c);
1980 else
1981 *s_new = c;
1982 s_new++;
1983 }
1984 return new;
1985}
1986
1987
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001988PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989"S.count(sub[, start[, end]]) -> int\n\
1990\n\
1991Return the number of occurrences of substring sub in string\n\
1992S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001993interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994
1995static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001996string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001998 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999 int len = PyString_GET_SIZE(self), n;
2000 int i = 0, last = INT_MAX;
2001 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002002 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003
Guido van Rossumc6821402000-05-08 14:08:05 +00002004 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2005 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002007
Guido van Rossum4c08d552000-03-10 22:55:18 +00002008 if (PyString_Check(subobj)) {
2009 sub = PyString_AS_STRING(subobj);
2010 n = PyString_GET_SIZE(subobj);
2011 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002012#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002013 else if (PyUnicode_Check(subobj)) {
2014 int count;
2015 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2016 if (count == -1)
2017 return NULL;
2018 else
2019 return PyInt_FromLong((long) count);
2020 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002021#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002022 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2023 return NULL;
2024
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002025 string_adjust_indices(&i, &last, len);
2026
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027 m = last + 1 - n;
2028 if (n == 0)
2029 return PyInt_FromLong((long) (m-i));
2030
2031 r = 0;
2032 while (i < m) {
2033 if (!memcmp(s+i, sub, n)) {
2034 r++;
2035 i += n;
2036 } else {
2037 i++;
2038 }
2039 }
2040 return PyInt_FromLong((long) r);
2041}
2042
2043
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002044PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045"S.swapcase() -> string\n\
2046\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002047Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002048converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002049
2050static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002051string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052{
2053 char *s = PyString_AS_STRING(self), *s_new;
2054 int i, n = PyString_GET_SIZE(self);
2055 PyObject *new;
2056
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057 new = PyString_FromStringAndSize(NULL, n);
2058 if (new == NULL)
2059 return NULL;
2060 s_new = PyString_AsString(new);
2061 for (i = 0; i < n; i++) {
2062 int c = Py_CHARMASK(*s++);
2063 if (islower(c)) {
2064 *s_new = toupper(c);
2065 }
2066 else if (isupper(c)) {
2067 *s_new = tolower(c);
2068 }
2069 else
2070 *s_new = c;
2071 s_new++;
2072 }
2073 return new;
2074}
2075
2076
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002077PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078"S.translate(table [,deletechars]) -> string\n\
2079\n\
2080Return a copy of the string S, where all characters occurring\n\
2081in the optional argument deletechars are removed, and the\n\
2082remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002083translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084
2085static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002086string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002087{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088 register char *input, *output;
2089 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090 register int i, c, changed = 0;
2091 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093 int inlen, tablen, dellen = 0;
2094 PyObject *result;
2095 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098 if (!PyArg_ParseTuple(args, "O|O:translate",
2099 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101
2102 if (PyString_Check(tableobj)) {
2103 table1 = PyString_AS_STRING(tableobj);
2104 tablen = PyString_GET_SIZE(tableobj);
2105 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002106#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002108 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 parameter; instead a mapping to None will cause characters
2110 to be deleted. */
2111 if (delobj != NULL) {
2112 PyErr_SetString(PyExc_TypeError,
2113 "deletions are implemented differently for unicode");
2114 return NULL;
2115 }
2116 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2117 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002118#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002119 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002121
2122 if (delobj != NULL) {
2123 if (PyString_Check(delobj)) {
2124 del_table = PyString_AS_STRING(delobj);
2125 dellen = PyString_GET_SIZE(delobj);
2126 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002127#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002128 else if (PyUnicode_Check(delobj)) {
2129 PyErr_SetString(PyExc_TypeError,
2130 "deletions are implemented differently for unicode");
2131 return NULL;
2132 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002133#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2135 return NULL;
2136
2137 if (tablen != 256) {
2138 PyErr_SetString(PyExc_ValueError,
2139 "translation table must be 256 characters long");
2140 return NULL;
2141 }
2142 }
2143 else {
2144 del_table = NULL;
2145 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146 }
2147
2148 table = table1;
2149 inlen = PyString_Size(input_obj);
2150 result = PyString_FromStringAndSize((char *)NULL, inlen);
2151 if (result == NULL)
2152 return NULL;
2153 output_start = output = PyString_AsString(result);
2154 input = PyString_AsString(input_obj);
2155
2156 if (dellen == 0) {
2157 /* If no deletions are required, use faster code */
2158 for (i = inlen; --i >= 0; ) {
2159 c = Py_CHARMASK(*input++);
2160 if (Py_CHARMASK((*output++ = table[c])) != c)
2161 changed = 1;
2162 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002163 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164 return result;
2165 Py_DECREF(result);
2166 Py_INCREF(input_obj);
2167 return input_obj;
2168 }
2169
2170 for (i = 0; i < 256; i++)
2171 trans_table[i] = Py_CHARMASK(table[i]);
2172
2173 for (i = 0; i < dellen; i++)
2174 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2175
2176 for (i = inlen; --i >= 0; ) {
2177 c = Py_CHARMASK(*input++);
2178 if (trans_table[c] != -1)
2179 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2180 continue;
2181 changed = 1;
2182 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002183 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184 Py_DECREF(result);
2185 Py_INCREF(input_obj);
2186 return input_obj;
2187 }
2188 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002189 if (inlen > 0)
2190 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191 return result;
2192}
2193
2194
2195/* What follows is used for implementing replace(). Perry Stoll. */
2196
2197/*
2198 mymemfind
2199
2200 strstr replacement for arbitrary blocks of memory.
2201
Barry Warsaw51ac5802000-03-20 16:36:48 +00002202 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 contents of memory pointed to by PAT. Returns the index into MEM if
2204 found, or -1 if not found. If len of PAT is greater than length of
2205 MEM, the function returns -1.
2206*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002207static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002208mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209{
2210 register int ii;
2211
2212 /* pattern can not occur in the last pat_len-1 chars */
2213 len -= pat_len;
2214
2215 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002216 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217 return ii;
2218 }
2219 }
2220 return -1;
2221}
2222
2223/*
2224 mymemcnt
2225
2226 Return the number of distinct times PAT is found in MEM.
2227 meaning mem=1111 and pat==11 returns 2.
2228 mem=11111 and pat==11 also return 2.
2229 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002230static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002231mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232{
2233 register int offset = 0;
2234 int nfound = 0;
2235
2236 while (len >= 0) {
2237 offset = mymemfind(mem, len, pat, pat_len);
2238 if (offset == -1)
2239 break;
2240 mem += offset + pat_len;
2241 len -= offset + pat_len;
2242 nfound++;
2243 }
2244 return nfound;
2245}
2246
2247/*
2248 mymemreplace
2249
Thomas Wouters7e474022000-07-16 12:04:32 +00002250 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251 replaced with SUB.
2252
Thomas Wouters7e474022000-07-16 12:04:32 +00002253 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254 of PAT in STR, then the original string is returned. Otherwise, a new
2255 string is allocated here and returned.
2256
2257 on return, out_len is:
2258 the length of output string, or
2259 -1 if the input string is returned, or
2260 unchanged if an error occurs (no memory).
2261
2262 return value is:
2263 the new string allocated locally, or
2264 NULL if an error occurred.
2265*/
2266static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002267mymemreplace(const char *str, int len, /* input string */
2268 const char *pat, int pat_len, /* pattern string to find */
2269 const char *sub, int sub_len, /* substitution string */
2270 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002271 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272{
2273 char *out_s;
2274 char *new_s;
2275 int nfound, offset, new_len;
2276
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002277 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278 goto return_same;
2279
2280 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002281 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002282 if (count < 0)
2283 count = INT_MAX;
2284 else if (nfound > count)
2285 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286 if (nfound == 0)
2287 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002288
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002289 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002290 if (new_len == 0) {
2291 /* Have to allocate something for the caller to free(). */
2292 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002293 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002294 return NULL;
2295 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002297 else {
2298 assert(new_len > 0);
2299 new_s = (char *)PyMem_MALLOC(new_len);
2300 if (new_s == NULL)
2301 return NULL;
2302 out_s = new_s;
2303
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002304 if (pat_len > 0) {
2305 for (; nfound > 0; --nfound) {
2306 /* find index of next instance of pattern */
2307 offset = mymemfind(str, len, pat, pat_len);
2308 if (offset == -1)
2309 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002310
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002311 /* copy non matching part of input string */
2312 memcpy(new_s, str, offset);
2313 str += offset + pat_len;
2314 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002315
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002316 /* copy substitute into the output string */
2317 new_s += offset;
2318 memcpy(new_s, sub, sub_len);
2319 new_s += sub_len;
2320 }
2321 /* copy any remaining values into output string */
2322 if (len > 0)
2323 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002324 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002325 else {
2326 for (;;++str, --len) {
2327 memcpy(new_s, sub, sub_len);
2328 new_s += sub_len;
2329 if (--nfound <= 0) {
2330 memcpy(new_s, str, len);
2331 break;
2332 }
2333 *new_s++ = *str;
2334 }
2335 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002336 }
2337 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 return out_s;
2339
2340 return_same:
2341 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002342 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343}
2344
2345
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002346PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347"S.replace (old, new[, maxsplit]) -> string\n\
2348\n\
2349Return a copy of string S with all occurrences of substring\n\
2350old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002351given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352
2353static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002354string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 const char *str = PyString_AS_STRING(self), *sub, *repl;
2357 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002358 const int len = PyString_GET_SIZE(self);
2359 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002360 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002362 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363
Guido van Rossum4c08d552000-03-10 22:55:18 +00002364 if (!PyArg_ParseTuple(args, "OO|i:replace",
2365 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367
2368 if (PyString_Check(subobj)) {
2369 sub = PyString_AS_STRING(subobj);
2370 sub_len = PyString_GET_SIZE(subobj);
2371 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002372#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002373 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002374 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002375 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002376#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002377 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2378 return NULL;
2379
2380 if (PyString_Check(replobj)) {
2381 repl = PyString_AS_STRING(replobj);
2382 repl_len = PyString_GET_SIZE(replobj);
2383 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002384#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002385 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002386 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002388#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002389 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2390 return NULL;
2391
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002393 if (new_s == NULL) {
2394 PyErr_NoMemory();
2395 return NULL;
2396 }
2397 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002398 if (PyString_CheckExact(self)) {
2399 /* we're returning another reference to self */
2400 new = (PyObject*)self;
2401 Py_INCREF(new);
2402 }
2403 else {
2404 new = PyString_FromStringAndSize(str, len);
2405 if (new == NULL)
2406 return NULL;
2407 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408 }
2409 else {
2410 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002411 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412 }
2413 return new;
2414}
2415
2416
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002417PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002418"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002420Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002422comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423
2424static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002425string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430 int plen;
2431 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002432 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434
Guido van Rossumc6821402000-05-08 14:08:05 +00002435 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2436 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002437 return NULL;
2438 if (PyString_Check(subobj)) {
2439 prefix = PyString_AS_STRING(subobj);
2440 plen = PyString_GET_SIZE(subobj);
2441 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002442#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002443 else if (PyUnicode_Check(subobj)) {
2444 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002445 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002446 subobj, start, end, -1);
2447 if (rc == -1)
2448 return NULL;
2449 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002450 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002451 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002452#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002453 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002454 return NULL;
2455
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002456 string_adjust_indices(&start, &end, len);
2457
2458 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002459 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002460
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002461 if (end-start >= plen)
2462 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2463 else
2464 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002465}
2466
2467
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002468PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002469"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002470\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002471Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002473comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002474
2475static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002476string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002477{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002478 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002479 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002480 const char* suffix;
2481 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002483 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002484 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002485
Guido van Rossumc6821402000-05-08 14:08:05 +00002486 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2487 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002488 return NULL;
2489 if (PyString_Check(subobj)) {
2490 suffix = PyString_AS_STRING(subobj);
2491 slen = PyString_GET_SIZE(subobj);
2492 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002493#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002494 else if (PyUnicode_Check(subobj)) {
2495 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002496 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002497 subobj, start, end, +1);
2498 if (rc == -1)
2499 return NULL;
2500 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002501 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002502 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002503#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505 return NULL;
2506
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002507 string_adjust_indices(&start, &end, len);
2508
2509 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002510 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002511
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002512 if (end-slen > start)
2513 start = end - slen;
2514 if (end-start >= slen)
2515 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2516 else
2517 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002518}
2519
2520
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002521PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002522"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002523\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002524Encodes S using the codec registered for encoding. encoding defaults\n\
2525to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002526handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002527a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2528'xmlcharrefreplace' as well as any other name registered with\n\
2529codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002530
2531static PyObject *
2532string_encode(PyStringObject *self, PyObject *args)
2533{
2534 char *encoding = NULL;
2535 char *errors = NULL;
2536 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2537 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002538 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2539}
2540
2541
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002542PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002543"S.decode([encoding[,errors]]) -> object\n\
2544\n\
2545Decodes S using the codec registered for encoding. encoding defaults\n\
2546to the default encoding. errors may be given to set a different error\n\
2547handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002548a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2549as well as any other name registerd with codecs.register_error that is\n\
2550able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002551
2552static PyObject *
2553string_decode(PyStringObject *self, PyObject *args)
2554{
2555 char *encoding = NULL;
2556 char *errors = NULL;
2557 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2558 return NULL;
2559 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002560}
2561
2562
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002563PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002564"S.expandtabs([tabsize]) -> string\n\
2565\n\
2566Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002567If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002568
2569static PyObject*
2570string_expandtabs(PyStringObject *self, PyObject *args)
2571{
2572 const char *e, *p;
2573 char *q;
2574 int i, j;
2575 PyObject *u;
2576 int tabsize = 8;
2577
2578 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2579 return NULL;
2580
Thomas Wouters7e474022000-07-16 12:04:32 +00002581 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002582 i = j = 0;
2583 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2584 for (p = PyString_AS_STRING(self); p < e; p++)
2585 if (*p == '\t') {
2586 if (tabsize > 0)
2587 j += tabsize - (j % tabsize);
2588 }
2589 else {
2590 j++;
2591 if (*p == '\n' || *p == '\r') {
2592 i += j;
2593 j = 0;
2594 }
2595 }
2596
2597 /* Second pass: create output string and fill it */
2598 u = PyString_FromStringAndSize(NULL, i + j);
2599 if (!u)
2600 return NULL;
2601
2602 j = 0;
2603 q = PyString_AS_STRING(u);
2604
2605 for (p = PyString_AS_STRING(self); p < e; p++)
2606 if (*p == '\t') {
2607 if (tabsize > 0) {
2608 i = tabsize - (j % tabsize);
2609 j += i;
2610 while (i--)
2611 *q++ = ' ';
2612 }
2613 }
2614 else {
2615 j++;
2616 *q++ = *p;
2617 if (*p == '\n' || *p == '\r')
2618 j = 0;
2619 }
2620
2621 return u;
2622}
2623
Tim Peters8fa5dd02001-09-12 02:18:30 +00002624static PyObject *
2625pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002626{
2627 PyObject *u;
2628
2629 if (left < 0)
2630 left = 0;
2631 if (right < 0)
2632 right = 0;
2633
Tim Peters8fa5dd02001-09-12 02:18:30 +00002634 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635 Py_INCREF(self);
2636 return (PyObject *)self;
2637 }
2638
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002639 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002640 left + PyString_GET_SIZE(self) + right);
2641 if (u) {
2642 if (left)
2643 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002644 memcpy(PyString_AS_STRING(u) + left,
2645 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646 PyString_GET_SIZE(self));
2647 if (right)
2648 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2649 fill, right);
2650 }
2651
2652 return u;
2653}
2654
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002655PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002656"S.ljust(width) -> string\n"
2657"\n"
2658"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002659"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002660
2661static PyObject *
2662string_ljust(PyStringObject *self, PyObject *args)
2663{
2664 int width;
2665 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2666 return NULL;
2667
Tim Peters8fa5dd02001-09-12 02:18:30 +00002668 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669 Py_INCREF(self);
2670 return (PyObject*) self;
2671 }
2672
2673 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2674}
2675
2676
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002677PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002678"S.rjust(width) -> string\n"
2679"\n"
2680"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002681"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002682
2683static PyObject *
2684string_rjust(PyStringObject *self, PyObject *args)
2685{
2686 int width;
2687 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2688 return NULL;
2689
Tim Peters8fa5dd02001-09-12 02:18:30 +00002690 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002691 Py_INCREF(self);
2692 return (PyObject*) self;
2693 }
2694
2695 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2696}
2697
2698
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002699PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002700"S.center(width) -> string\n"
2701"\n"
2702"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002703"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002704
2705static PyObject *
2706string_center(PyStringObject *self, PyObject *args)
2707{
2708 int marg, left;
2709 int width;
2710
2711 if (!PyArg_ParseTuple(args, "i:center", &width))
2712 return NULL;
2713
Tim Peters8fa5dd02001-09-12 02:18:30 +00002714 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002715 Py_INCREF(self);
2716 return (PyObject*) self;
2717 }
2718
2719 marg = width - PyString_GET_SIZE(self);
2720 left = marg / 2 + (marg & width & 1);
2721
2722 return pad(self, left, marg - left, ' ');
2723}
2724
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002725PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002726"S.zfill(width) -> string\n"
2727"\n"
2728"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002729"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002730
2731static PyObject *
2732string_zfill(PyStringObject *self, PyObject *args)
2733{
2734 int fill;
2735 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002736 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002737
2738 int width;
2739 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2740 return NULL;
2741
2742 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002743 if (PyString_CheckExact(self)) {
2744 Py_INCREF(self);
2745 return (PyObject*) self;
2746 }
2747 else
2748 return PyString_FromStringAndSize(
2749 PyString_AS_STRING(self),
2750 PyString_GET_SIZE(self)
2751 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002752 }
2753
2754 fill = width - PyString_GET_SIZE(self);
2755
2756 s = pad(self, fill, 0, '0');
2757
2758 if (s == NULL)
2759 return NULL;
2760
2761 p = PyString_AS_STRING(s);
2762 if (p[fill] == '+' || p[fill] == '-') {
2763 /* move sign to beginning of string */
2764 p[0] = p[fill];
2765 p[fill] = '0';
2766 }
2767
2768 return (PyObject*) s;
2769}
2770
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002771PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002772"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002773"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002774"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002775"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002776
2777static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002778string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002779{
Fred Drakeba096332000-07-09 07:04:36 +00002780 register const unsigned char *p
2781 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002782 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002783
Guido van Rossum4c08d552000-03-10 22:55:18 +00002784 /* Shortcut for single character strings */
2785 if (PyString_GET_SIZE(self) == 1 &&
2786 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002787 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002788
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002789 /* Special case for empty strings */
2790 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002791 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002792
Guido van Rossum4c08d552000-03-10 22:55:18 +00002793 e = p + PyString_GET_SIZE(self);
2794 for (; p < e; p++) {
2795 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002796 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002797 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002798 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002799}
2800
2801
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002802PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002803"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002804\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002805Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002806and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002807
2808static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002809string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002810{
Fred Drakeba096332000-07-09 07:04:36 +00002811 register const unsigned char *p
2812 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002813 register const unsigned char *e;
2814
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002815 /* Shortcut for single character strings */
2816 if (PyString_GET_SIZE(self) == 1 &&
2817 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002818 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002819
2820 /* Special case for empty strings */
2821 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002822 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002823
2824 e = p + PyString_GET_SIZE(self);
2825 for (; p < e; p++) {
2826 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002827 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002828 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002829 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002830}
2831
2832
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002833PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002834"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002835\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002836Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002837and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002838
2839static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002840string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002841{
Fred Drakeba096332000-07-09 07:04:36 +00002842 register const unsigned char *p
2843 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002844 register const unsigned char *e;
2845
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002846 /* Shortcut for single character strings */
2847 if (PyString_GET_SIZE(self) == 1 &&
2848 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002849 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002850
2851 /* Special case for empty strings */
2852 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002853 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002854
2855 e = p + PyString_GET_SIZE(self);
2856 for (; p < e; p++) {
2857 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002858 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002859 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002860 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002861}
2862
2863
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002864PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002865"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002866\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002867Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002868False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869
2870static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002871string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002872{
Fred Drakeba096332000-07-09 07:04:36 +00002873 register const unsigned char *p
2874 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002875 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002876
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877 /* Shortcut for single character strings */
2878 if (PyString_GET_SIZE(self) == 1 &&
2879 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002880 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002881
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002882 /* Special case for empty strings */
2883 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002884 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002885
Guido van Rossum4c08d552000-03-10 22:55:18 +00002886 e = p + PyString_GET_SIZE(self);
2887 for (; p < e; p++) {
2888 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002889 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002890 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002891 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002892}
2893
2894
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002895PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002896"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002897\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002898Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002899at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002900
2901static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002902string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002903{
Fred Drakeba096332000-07-09 07:04:36 +00002904 register const unsigned char *p
2905 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002906 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002907 int cased;
2908
Guido van Rossum4c08d552000-03-10 22:55:18 +00002909 /* Shortcut for single character strings */
2910 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002911 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002912
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002913 /* Special case for empty strings */
2914 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002915 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002916
Guido van Rossum4c08d552000-03-10 22:55:18 +00002917 e = p + PyString_GET_SIZE(self);
2918 cased = 0;
2919 for (; p < e; p++) {
2920 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002921 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002922 else if (!cased && islower(*p))
2923 cased = 1;
2924 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002925 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002926}
2927
2928
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002929PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002930"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002931\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002932Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002933at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002934
2935static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002936string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002937{
Fred Drakeba096332000-07-09 07:04:36 +00002938 register const unsigned char *p
2939 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002940 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002941 int cased;
2942
Guido van Rossum4c08d552000-03-10 22:55:18 +00002943 /* Shortcut for single character strings */
2944 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002945 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002946
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002947 /* Special case for empty strings */
2948 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002949 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002950
Guido van Rossum4c08d552000-03-10 22:55:18 +00002951 e = p + PyString_GET_SIZE(self);
2952 cased = 0;
2953 for (; p < e; p++) {
2954 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002955 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002956 else if (!cased && isupper(*p))
2957 cased = 1;
2958 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002959 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002960}
2961
2962
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002963PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002964"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002965\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002966Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002967may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002968ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002969
2970static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002971string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002972{
Fred Drakeba096332000-07-09 07:04:36 +00002973 register const unsigned char *p
2974 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002975 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002976 int cased, previous_is_cased;
2977
Guido van Rossum4c08d552000-03-10 22:55:18 +00002978 /* Shortcut for single character strings */
2979 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002980 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002981
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002982 /* Special case for empty strings */
2983 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002984 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002985
Guido van Rossum4c08d552000-03-10 22:55:18 +00002986 e = p + PyString_GET_SIZE(self);
2987 cased = 0;
2988 previous_is_cased = 0;
2989 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002990 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002991
2992 if (isupper(ch)) {
2993 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002994 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002995 previous_is_cased = 1;
2996 cased = 1;
2997 }
2998 else if (islower(ch)) {
2999 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003000 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003001 previous_is_cased = 1;
3002 cased = 1;
3003 }
3004 else
3005 previous_is_cased = 0;
3006 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003007 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003008}
3009
3010
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003011PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003012"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003013\n\
3014Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003015Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003016is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003017
3018#define SPLIT_APPEND(data, left, right) \
3019 str = PyString_FromStringAndSize(data + left, right - left); \
3020 if (!str) \
3021 goto onError; \
3022 if (PyList_Append(list, str)) { \
3023 Py_DECREF(str); \
3024 goto onError; \
3025 } \
3026 else \
3027 Py_DECREF(str);
3028
3029static PyObject*
3030string_splitlines(PyStringObject *self, PyObject *args)
3031{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003032 register int i;
3033 register int j;
3034 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003035 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003036 PyObject *list;
3037 PyObject *str;
3038 char *data;
3039
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003040 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003041 return NULL;
3042
3043 data = PyString_AS_STRING(self);
3044 len = PyString_GET_SIZE(self);
3045
Guido van Rossum4c08d552000-03-10 22:55:18 +00003046 list = PyList_New(0);
3047 if (!list)
3048 goto onError;
3049
3050 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003051 int eol;
3052
Guido van Rossum4c08d552000-03-10 22:55:18 +00003053 /* Find a line and append it */
3054 while (i < len && data[i] != '\n' && data[i] != '\r')
3055 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003056
3057 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003058 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059 if (i < len) {
3060 if (data[i] == '\r' && i + 1 < len &&
3061 data[i+1] == '\n')
3062 i += 2;
3063 else
3064 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003065 if (keepends)
3066 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003068 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069 j = i;
3070 }
3071 if (j < len) {
3072 SPLIT_APPEND(data, j, len);
3073 }
3074
3075 return list;
3076
3077 onError:
3078 Py_DECREF(list);
3079 return NULL;
3080}
3081
3082#undef SPLIT_APPEND
3083
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003084
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003085static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003086string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003087 /* Counterparts of the obsolete stropmodule functions; except
3088 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003089 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3090 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3091 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3092 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003093 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3094 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3095 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3096 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3097 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3098 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3099 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003100 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3101 capitalize__doc__},
3102 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3103 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3104 endswith__doc__},
3105 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3106 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3107 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3108 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3109 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3110 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3111 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3112 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3113 startswith__doc__},
3114 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3115 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3116 swapcase__doc__},
3117 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3118 translate__doc__},
3119 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3120 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3121 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3122 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3123 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3124 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3125 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3126 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3127 expandtabs__doc__},
3128 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3129 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003130 {NULL, NULL} /* sentinel */
3131};
3132
Jeremy Hylton938ace62002-07-17 16:30:39 +00003133static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003134str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3135
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003136static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003137string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003138{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003139 PyObject *x = NULL;
3140 static char *kwlist[] = {"object", 0};
3141
Guido van Rossumae960af2001-08-30 03:11:59 +00003142 if (type != &PyString_Type)
3143 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003144 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3145 return NULL;
3146 if (x == NULL)
3147 return PyString_FromString("");
3148 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003149}
3150
Guido van Rossumae960af2001-08-30 03:11:59 +00003151static PyObject *
3152str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3153{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003154 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003155 int n;
3156
3157 assert(PyType_IsSubtype(type, &PyString_Type));
3158 tmp = string_new(&PyString_Type, args, kwds);
3159 if (tmp == NULL)
3160 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003161 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003162 n = PyString_GET_SIZE(tmp);
3163 pnew = type->tp_alloc(type, n);
3164 if (pnew != NULL) {
3165 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003166 ((PyStringObject *)pnew)->ob_shash =
3167 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003168 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003169 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003170 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003171 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003172}
3173
Guido van Rossumcacfc072002-05-24 19:01:59 +00003174static PyObject *
3175basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3176{
3177 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003178 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003179 return NULL;
3180}
3181
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003182PyDoc_STRVAR(basestring_doc,
3183"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003184
3185PyTypeObject PyBaseString_Type = {
3186 PyObject_HEAD_INIT(&PyType_Type)
3187 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003188 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003189 0,
3190 0,
3191 0, /* tp_dealloc */
3192 0, /* tp_print */
3193 0, /* tp_getattr */
3194 0, /* tp_setattr */
3195 0, /* tp_compare */
3196 0, /* tp_repr */
3197 0, /* tp_as_number */
3198 0, /* tp_as_sequence */
3199 0, /* tp_as_mapping */
3200 0, /* tp_hash */
3201 0, /* tp_call */
3202 0, /* tp_str */
3203 0, /* tp_getattro */
3204 0, /* tp_setattro */
3205 0, /* tp_as_buffer */
3206 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3207 basestring_doc, /* tp_doc */
3208 0, /* tp_traverse */
3209 0, /* tp_clear */
3210 0, /* tp_richcompare */
3211 0, /* tp_weaklistoffset */
3212 0, /* tp_iter */
3213 0, /* tp_iternext */
3214 0, /* tp_methods */
3215 0, /* tp_members */
3216 0, /* tp_getset */
3217 &PyBaseObject_Type, /* tp_base */
3218 0, /* tp_dict */
3219 0, /* tp_descr_get */
3220 0, /* tp_descr_set */
3221 0, /* tp_dictoffset */
3222 0, /* tp_init */
3223 0, /* tp_alloc */
3224 basestring_new, /* tp_new */
3225 0, /* tp_free */
3226};
3227
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003228PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003229"str(object) -> string\n\
3230\n\
3231Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003232If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003233
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003234PyTypeObject PyString_Type = {
3235 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003236 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003237 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003238 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003239 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003240 (destructor)string_dealloc, /* tp_dealloc */
3241 (printfunc)string_print, /* tp_print */
3242 0, /* tp_getattr */
3243 0, /* tp_setattr */
3244 0, /* tp_compare */
3245 (reprfunc)string_repr, /* tp_repr */
3246 0, /* tp_as_number */
3247 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003248 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003249 (hashfunc)string_hash, /* tp_hash */
3250 0, /* tp_call */
3251 (reprfunc)string_str, /* tp_str */
3252 PyObject_GenericGetAttr, /* tp_getattro */
3253 0, /* tp_setattro */
3254 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00003255 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003256 string_doc, /* tp_doc */
3257 0, /* tp_traverse */
3258 0, /* tp_clear */
3259 (richcmpfunc)string_richcompare, /* tp_richcompare */
3260 0, /* tp_weaklistoffset */
3261 0, /* tp_iter */
3262 0, /* tp_iternext */
3263 string_methods, /* tp_methods */
3264 0, /* tp_members */
3265 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003266 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003267 0, /* tp_dict */
3268 0, /* tp_descr_get */
3269 0, /* tp_descr_set */
3270 0, /* tp_dictoffset */
3271 0, /* tp_init */
3272 0, /* tp_alloc */
3273 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003274 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003275};
3276
3277void
Fred Drakeba096332000-07-09 07:04:36 +00003278PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003279{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003280 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003281 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003282 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003283 if (w == NULL || !PyString_Check(*pv)) {
3284 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003285 *pv = NULL;
3286 return;
3287 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003288 v = string_concat((PyStringObject *) *pv, w);
3289 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003290 *pv = v;
3291}
3292
Guido van Rossum013142a1994-08-30 08:19:36 +00003293void
Fred Drakeba096332000-07-09 07:04:36 +00003294PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003295{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003296 PyString_Concat(pv, w);
3297 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003298}
3299
3300
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003301/* The following function breaks the notion that strings are immutable:
3302 it changes the size of a string. We get away with this only if there
3303 is only one module referencing the object. You can also think of it
3304 as creating a new string object and destroying the old one, only
3305 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003306 already be known to some other part of the code...
3307 Note that if there's not enough memory to resize the string, the original
3308 string object at *pv is deallocated, *pv is set to NULL, an "out of
3309 memory" exception is set, and -1 is returned. Else (on success) 0 is
3310 returned, and the value in *pv may or may not be the same as on input.
3311 As always, an extra byte is allocated for a trailing \0 byte (newsize
3312 does *not* include that), and a trailing \0 byte is stored.
3313*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003314
3315int
Fred Drakeba096332000-07-09 07:04:36 +00003316_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003317{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003318 register PyObject *v;
3319 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003320 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003321 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003322 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003323 Py_DECREF(v);
3324 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003325 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003326 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003327 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003328 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003329 _Py_ForgetReference(v);
3330 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003331 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003332 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003333 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003334 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003335 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003336 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003337 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003338 _Py_NewReference(*pv);
3339 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003340 sv->ob_size = newsize;
3341 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003342 return 0;
3343}
Guido van Rossume5372401993-03-16 12:15:04 +00003344
3345/* Helpers for formatstring */
3346
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003347static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003348getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003349{
3350 int argidx = *p_argidx;
3351 if (argidx < arglen) {
3352 (*p_argidx)++;
3353 if (arglen < 0)
3354 return args;
3355 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003356 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003357 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003358 PyErr_SetString(PyExc_TypeError,
3359 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003360 return NULL;
3361}
3362
Tim Peters38fd5b62000-09-21 05:43:11 +00003363/* Format codes
3364 * F_LJUST '-'
3365 * F_SIGN '+'
3366 * F_BLANK ' '
3367 * F_ALT '#'
3368 * F_ZERO '0'
3369 */
Guido van Rossume5372401993-03-16 12:15:04 +00003370#define F_LJUST (1<<0)
3371#define F_SIGN (1<<1)
3372#define F_BLANK (1<<2)
3373#define F_ALT (1<<3)
3374#define F_ZERO (1<<4)
3375
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003376static int
Fred Drakeba096332000-07-09 07:04:36 +00003377formatfloat(char *buf, size_t buflen, int flags,
3378 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003379{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003380 /* fmt = '%#.' + `prec` + `type`
3381 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003382 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003383 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003384 x = PyFloat_AsDouble(v);
3385 if (x == -1.0 && PyErr_Occurred()) {
3386 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003387 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003388 }
Guido van Rossume5372401993-03-16 12:15:04 +00003389 if (prec < 0)
3390 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003391 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3392 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003393 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3394 (flags&F_ALT) ? "#" : "",
3395 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003396 /* worst case length calc to ensure no buffer overrun:
3397 fmt = %#.<prec>g
3398 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003399 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003400 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3401 If prec=0 the effective precision is 1 (the leading digit is
3402 always given), therefore increase by one to 10+prec. */
3403 if (buflen <= (size_t)10 + (size_t)prec) {
3404 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003405 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003406 return -1;
3407 }
Tim Peters885d4572001-11-28 20:27:42 +00003408 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003409 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003410}
3411
Tim Peters38fd5b62000-09-21 05:43:11 +00003412/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3413 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3414 * Python's regular ints.
3415 * Return value: a new PyString*, or NULL if error.
3416 * . *pbuf is set to point into it,
3417 * *plen set to the # of chars following that.
3418 * Caller must decref it when done using pbuf.
3419 * The string starting at *pbuf is of the form
3420 * "-"? ("0x" | "0X")? digit+
3421 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003422 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003423 * There will be at least prec digits, zero-filled on the left if
3424 * necessary to get that many.
3425 * val object to be converted
3426 * flags bitmask of format flags; only F_ALT is looked at
3427 * prec minimum number of digits; 0-fill on left if needed
3428 * type a character in [duoxX]; u acts the same as d
3429 *
3430 * CAUTION: o, x and X conversions on regular ints can never
3431 * produce a '-' sign, but can for Python's unbounded ints.
3432 */
3433PyObject*
3434_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3435 char **pbuf, int *plen)
3436{
3437 PyObject *result = NULL;
3438 char *buf;
3439 int i;
3440 int sign; /* 1 if '-', else 0 */
3441 int len; /* number of characters */
3442 int numdigits; /* len == numnondigits + numdigits */
3443 int numnondigits = 0;
3444
3445 switch (type) {
3446 case 'd':
3447 case 'u':
3448 result = val->ob_type->tp_str(val);
3449 break;
3450 case 'o':
3451 result = val->ob_type->tp_as_number->nb_oct(val);
3452 break;
3453 case 'x':
3454 case 'X':
3455 numnondigits = 2;
3456 result = val->ob_type->tp_as_number->nb_hex(val);
3457 break;
3458 default:
3459 assert(!"'type' not in [duoxX]");
3460 }
3461 if (!result)
3462 return NULL;
3463
3464 /* To modify the string in-place, there can only be one reference. */
3465 if (result->ob_refcnt != 1) {
3466 PyErr_BadInternalCall();
3467 return NULL;
3468 }
3469 buf = PyString_AsString(result);
3470 len = PyString_Size(result);
3471 if (buf[len-1] == 'L') {
3472 --len;
3473 buf[len] = '\0';
3474 }
3475 sign = buf[0] == '-';
3476 numnondigits += sign;
3477 numdigits = len - numnondigits;
3478 assert(numdigits > 0);
3479
Tim Petersfff53252001-04-12 18:38:48 +00003480 /* Get rid of base marker unless F_ALT */
3481 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003482 /* Need to skip 0x, 0X or 0. */
3483 int skipped = 0;
3484 switch (type) {
3485 case 'o':
3486 assert(buf[sign] == '0');
3487 /* If 0 is only digit, leave it alone. */
3488 if (numdigits > 1) {
3489 skipped = 1;
3490 --numdigits;
3491 }
3492 break;
3493 case 'x':
3494 case 'X':
3495 assert(buf[sign] == '0');
3496 assert(buf[sign + 1] == 'x');
3497 skipped = 2;
3498 numnondigits -= 2;
3499 break;
3500 }
3501 if (skipped) {
3502 buf += skipped;
3503 len -= skipped;
3504 if (sign)
3505 buf[0] = '-';
3506 }
3507 assert(len == numnondigits + numdigits);
3508 assert(numdigits > 0);
3509 }
3510
3511 /* Fill with leading zeroes to meet minimum width. */
3512 if (prec > numdigits) {
3513 PyObject *r1 = PyString_FromStringAndSize(NULL,
3514 numnondigits + prec);
3515 char *b1;
3516 if (!r1) {
3517 Py_DECREF(result);
3518 return NULL;
3519 }
3520 b1 = PyString_AS_STRING(r1);
3521 for (i = 0; i < numnondigits; ++i)
3522 *b1++ = *buf++;
3523 for (i = 0; i < prec - numdigits; i++)
3524 *b1++ = '0';
3525 for (i = 0; i < numdigits; i++)
3526 *b1++ = *buf++;
3527 *b1 = '\0';
3528 Py_DECREF(result);
3529 result = r1;
3530 buf = PyString_AS_STRING(result);
3531 len = numnondigits + prec;
3532 }
3533
3534 /* Fix up case for hex conversions. */
3535 switch (type) {
3536 case 'x':
3537 /* Need to convert all upper case letters to lower case. */
3538 for (i = 0; i < len; i++)
3539 if (buf[i] >= 'A' && buf[i] <= 'F')
3540 buf[i] += 'a'-'A';
3541 break;
3542 case 'X':
3543 /* Need to convert 0x to 0X (and -0x to -0X). */
3544 if (buf[sign + 1] == 'x')
3545 buf[sign + 1] = 'X';
3546 break;
3547 }
3548 *pbuf = buf;
3549 *plen = len;
3550 return result;
3551}
3552
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003553static int
Fred Drakeba096332000-07-09 07:04:36 +00003554formatint(char *buf, size_t buflen, int flags,
3555 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003556{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003557 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003558 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3559 + 1 + 1 = 24 */
3560 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003561 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003562
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003563 x = PyInt_AsLong(v);
3564 if (x == -1 && PyErr_Occurred()) {
3565 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003566 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003567 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003568 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003569 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003570 "%u/%o/%x/%X of negative int will return "
3571 "a signed string in Python 2.4 and up") < 0)
3572 return -1;
3573 }
Guido van Rossume5372401993-03-16 12:15:04 +00003574 if (prec < 0)
3575 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003576
3577 if ((flags & F_ALT) &&
3578 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003579 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003580 * of issues that cause pain:
3581 * - when 0 is being converted, the C standard leaves off
3582 * the '0x' or '0X', which is inconsistent with other
3583 * %#x/%#X conversions and inconsistent with Python's
3584 * hex() function
3585 * - there are platforms that violate the standard and
3586 * convert 0 with the '0x' or '0X'
3587 * (Metrowerks, Compaq Tru64)
3588 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003589 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003590 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003591 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003592 * We can achieve the desired consistency by inserting our
3593 * own '0x' or '0X' prefix, and substituting %x/%X in place
3594 * of %#x/%#X.
3595 *
3596 * Note that this is the same approach as used in
3597 * formatint() in unicodeobject.c
3598 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003599 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003600 type, prec, type);
3601 }
3602 else {
3603 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003604 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003605 prec, type);
3606 }
3607
Tim Peters38fd5b62000-09-21 05:43:11 +00003608 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003609 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3610 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003611 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003612 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003613 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003614 return -1;
3615 }
Tim Peters885d4572001-11-28 20:27:42 +00003616 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003617 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003618}
3619
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003620static int
Fred Drakeba096332000-07-09 07:04:36 +00003621formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003622{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003623 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003624 if (PyString_Check(v)) {
3625 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003626 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003627 }
3628 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003629 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003630 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003631 }
3632 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003633 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003634}
3635
Guido van Rossum013142a1994-08-30 08:19:36 +00003636
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003637/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3638
3639 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3640 chars are formatted. XXX This is a magic number. Each formatting
3641 routine does bounds checking to ensure no overflow, but a better
3642 solution may be to malloc a buffer of appropriate size for each
3643 format. For now, the current solution is sufficient.
3644*/
3645#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003646
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003647PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003648PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003649{
3650 char *fmt, *res;
3651 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003652 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003653 PyObject *result, *orig_args;
3654#ifdef Py_USING_UNICODE
3655 PyObject *v, *w;
3656#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003657 PyObject *dict = NULL;
3658 if (format == NULL || !PyString_Check(format) || args == NULL) {
3659 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003660 return NULL;
3661 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003662 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003663 fmt = PyString_AS_STRING(format);
3664 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003665 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003666 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003667 if (result == NULL)
3668 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003669 res = PyString_AsString(result);
3670 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003671 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003672 argidx = 0;
3673 }
3674 else {
3675 arglen = -1;
3676 argidx = -2;
3677 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003678 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003679 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003680 while (--fmtcnt >= 0) {
3681 if (*fmt != '%') {
3682 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003683 rescnt = fmtcnt + 100;
3684 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003685 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003686 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003687 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003688 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003689 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003690 }
3691 *res++ = *fmt++;
3692 }
3693 else {
3694 /* Got a format specifier */
3695 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003696 int width = -1;
3697 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003698 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003699 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003700 PyObject *v = NULL;
3701 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003702 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003703 int sign;
3704 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003705 char formatbuf[FORMATBUFLEN];
3706 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003707#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003708 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003709 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003710#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003711
Guido van Rossumda9c2711996-12-05 21:58:58 +00003712 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003713 if (*fmt == '(') {
3714 char *keystart;
3715 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003716 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003717 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003718
3719 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003720 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003721 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003722 goto error;
3723 }
3724 ++fmt;
3725 --fmtcnt;
3726 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003727 /* Skip over balanced parentheses */
3728 while (pcount > 0 && --fmtcnt >= 0) {
3729 if (*fmt == ')')
3730 --pcount;
3731 else if (*fmt == '(')
3732 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003733 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003734 }
3735 keylen = fmt - keystart - 1;
3736 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003737 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003738 "incomplete format key");
3739 goto error;
3740 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003741 key = PyString_FromStringAndSize(keystart,
3742 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003743 if (key == NULL)
3744 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003745 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003746 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003747 args_owned = 0;
3748 }
3749 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003750 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003751 if (args == NULL) {
3752 goto error;
3753 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003754 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003755 arglen = -1;
3756 argidx = -2;
3757 }
Guido van Rossume5372401993-03-16 12:15:04 +00003758 while (--fmtcnt >= 0) {
3759 switch (c = *fmt++) {
3760 case '-': flags |= F_LJUST; continue;
3761 case '+': flags |= F_SIGN; continue;
3762 case ' ': flags |= F_BLANK; continue;
3763 case '#': flags |= F_ALT; continue;
3764 case '0': flags |= F_ZERO; continue;
3765 }
3766 break;
3767 }
3768 if (c == '*') {
3769 v = getnextarg(args, arglen, &argidx);
3770 if (v == NULL)
3771 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003772 if (!PyInt_Check(v)) {
3773 PyErr_SetString(PyExc_TypeError,
3774 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003775 goto error;
3776 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003777 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003778 if (width < 0) {
3779 flags |= F_LJUST;
3780 width = -width;
3781 }
Guido van Rossume5372401993-03-16 12:15:04 +00003782 if (--fmtcnt >= 0)
3783 c = *fmt++;
3784 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003785 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003786 width = c - '0';
3787 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003788 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003789 if (!isdigit(c))
3790 break;
3791 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003792 PyErr_SetString(
3793 PyExc_ValueError,
3794 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003795 goto error;
3796 }
3797 width = width*10 + (c - '0');
3798 }
3799 }
3800 if (c == '.') {
3801 prec = 0;
3802 if (--fmtcnt >= 0)
3803 c = *fmt++;
3804 if (c == '*') {
3805 v = getnextarg(args, arglen, &argidx);
3806 if (v == NULL)
3807 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003808 if (!PyInt_Check(v)) {
3809 PyErr_SetString(
3810 PyExc_TypeError,
3811 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003812 goto error;
3813 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003814 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003815 if (prec < 0)
3816 prec = 0;
3817 if (--fmtcnt >= 0)
3818 c = *fmt++;
3819 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003820 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003821 prec = c - '0';
3822 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003823 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003824 if (!isdigit(c))
3825 break;
3826 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003827 PyErr_SetString(
3828 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003829 "prec too big");
3830 goto error;
3831 }
3832 prec = prec*10 + (c - '0');
3833 }
3834 }
3835 } /* prec */
3836 if (fmtcnt >= 0) {
3837 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003838 if (--fmtcnt >= 0)
3839 c = *fmt++;
3840 }
3841 }
3842 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003843 PyErr_SetString(PyExc_ValueError,
3844 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003845 goto error;
3846 }
3847 if (c != '%') {
3848 v = getnextarg(args, arglen, &argidx);
3849 if (v == NULL)
3850 goto error;
3851 }
3852 sign = 0;
3853 fill = ' ';
3854 switch (c) {
3855 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003856 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003857 len = 1;
3858 break;
3859 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003860 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003861#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003862 if (PyUnicode_Check(v)) {
3863 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003864 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003865 goto unicode;
3866 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003867#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003868 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003869 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003870 else
3871 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003872 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003873 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003874 if (!PyString_Check(temp)) {
3875 PyErr_SetString(PyExc_TypeError,
3876 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003877 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003878 goto error;
3879 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003880 pbuf = PyString_AS_STRING(temp);
3881 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003882 if (prec >= 0 && len > prec)
3883 len = prec;
3884 break;
3885 case 'i':
3886 case 'd':
3887 case 'u':
3888 case 'o':
3889 case 'x':
3890 case 'X':
3891 if (c == 'i')
3892 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003893 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003894 temp = _PyString_FormatLong(v, flags,
3895 prec, c, &pbuf, &len);
3896 if (!temp)
3897 goto error;
3898 /* unbounded ints can always produce
3899 a sign character! */
3900 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003901 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003902 else {
3903 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003904 len = formatint(pbuf,
3905 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003906 flags, prec, c, v);
3907 if (len < 0)
3908 goto error;
3909 /* only d conversion is signed */
3910 sign = c == 'd';
3911 }
3912 if (flags & F_ZERO)
3913 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003914 break;
3915 case 'e':
3916 case 'E':
3917 case 'f':
3918 case 'g':
3919 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003920 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003921 len = formatfloat(pbuf, sizeof(formatbuf),
3922 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003923 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003924 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003925 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003926 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003927 fill = '0';
3928 break;
3929 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003930 pbuf = formatbuf;
3931 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003932 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003933 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003934 break;
3935 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003936 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003937 "unsupported format character '%c' (0x%x) "
3938 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00003939 c, c,
3940 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00003941 goto error;
3942 }
3943 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003944 if (*pbuf == '-' || *pbuf == '+') {
3945 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003946 len--;
3947 }
3948 else if (flags & F_SIGN)
3949 sign = '+';
3950 else if (flags & F_BLANK)
3951 sign = ' ';
3952 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003953 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003954 }
3955 if (width < len)
3956 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003957 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003958 reslen -= rescnt;
3959 rescnt = width + fmtcnt + 100;
3960 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003961 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003962 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003963 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003964 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003965 }
3966 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003967 if (fill != ' ')
3968 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003969 rescnt--;
3970 if (width > len)
3971 width--;
3972 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003973 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3974 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003975 assert(pbuf[1] == c);
3976 if (fill != ' ') {
3977 *res++ = *pbuf++;
3978 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003979 }
Tim Petersfff53252001-04-12 18:38:48 +00003980 rescnt -= 2;
3981 width -= 2;
3982 if (width < 0)
3983 width = 0;
3984 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003985 }
3986 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003987 do {
3988 --rescnt;
3989 *res++ = fill;
3990 } while (--width > len);
3991 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003992 if (fill == ' ') {
3993 if (sign)
3994 *res++ = sign;
3995 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003996 (c == 'x' || c == 'X')) {
3997 assert(pbuf[0] == '0');
3998 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003999 *res++ = *pbuf++;
4000 *res++ = *pbuf++;
4001 }
4002 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004003 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004004 res += len;
4005 rescnt -= len;
4006 while (--width >= len) {
4007 --rescnt;
4008 *res++ = ' ';
4009 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004010 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004011 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004012 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004013 goto error;
4014 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004015 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004016 } /* '%' */
4017 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004018 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004019 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004020 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004021 goto error;
4022 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004023 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004024 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004025 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004026 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004027 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004028
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004029#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004030 unicode:
4031 if (args_owned) {
4032 Py_DECREF(args);
4033 args_owned = 0;
4034 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004035 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004036 if (PyTuple_Check(orig_args) && argidx > 0) {
4037 PyObject *v;
4038 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4039 v = PyTuple_New(n);
4040 if (v == NULL)
4041 goto error;
4042 while (--n >= 0) {
4043 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4044 Py_INCREF(w);
4045 PyTuple_SET_ITEM(v, n, w);
4046 }
4047 args = v;
4048 } else {
4049 Py_INCREF(orig_args);
4050 args = orig_args;
4051 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004052 args_owned = 1;
4053 /* Take what we have of the result and let the Unicode formatting
4054 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004055 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004056 if (_PyString_Resize(&result, rescnt))
4057 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004058 fmtcnt = PyString_GET_SIZE(format) - \
4059 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004060 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4061 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004062 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004063 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004064 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004065 if (v == NULL)
4066 goto error;
4067 /* Paste what we have (result) to what the Unicode formatting
4068 function returned (v) and return the result (or error) */
4069 w = PyUnicode_Concat(result, v);
4070 Py_DECREF(result);
4071 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004072 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004073 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004074#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004075
Guido van Rossume5372401993-03-16 12:15:04 +00004076 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004077 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004078 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004079 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004080 }
Guido van Rossume5372401993-03-16 12:15:04 +00004081 return NULL;
4082}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004083
Guido van Rossum2a61e741997-01-18 07:55:05 +00004084void
Fred Drakeba096332000-07-09 07:04:36 +00004085PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004086{
4087 register PyStringObject *s = (PyStringObject *)(*p);
4088 PyObject *t;
4089 if (s == NULL || !PyString_Check(s))
4090 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004091 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004092 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004093 if (interned == NULL) {
4094 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004095 if (interned == NULL) {
4096 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004097 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004098 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004099 }
4100 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4101 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004102 Py_DECREF(*p);
4103 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004104 return;
4105 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004106 /* Ensure that only true string objects appear in the intern dict */
4107 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004108 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4109 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004110 if (t == NULL) {
4111 PyErr_Clear();
4112 return;
Tim Peters111f6092001-09-12 07:54:51 +00004113 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004114 } else {
4115 t = (PyObject*) s;
4116 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004117 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004118
4119 if (PyDict_SetItem(interned, t, t) == 0) {
4120 /* The two references in interned are not counted by
4121 refcnt. The string deallocator will take care of this */
4122 ((PyObject *)t)->ob_refcnt-=2;
4123 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4124 Py_DECREF(*p);
4125 *p = t;
4126 return;
4127 }
4128 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004129 PyErr_Clear();
4130}
4131
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004132void
4133PyString_InternImmortal(PyObject **p)
4134{
4135 PyString_InternInPlace(p);
4136 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4137 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4138 Py_INCREF(*p);
4139 }
4140}
4141
Guido van Rossum2a61e741997-01-18 07:55:05 +00004142
4143PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004144PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004145{
4146 PyObject *s = PyString_FromString(cp);
4147 if (s == NULL)
4148 return NULL;
4149 PyString_InternInPlace(&s);
4150 return s;
4151}
4152
Guido van Rossum8cf04761997-08-02 02:57:45 +00004153void
Fred Drakeba096332000-07-09 07:04:36 +00004154PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004155{
4156 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004157 for (i = 0; i < UCHAR_MAX + 1; i++) {
4158 Py_XDECREF(characters[i]);
4159 characters[i] = NULL;
4160 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004161 Py_XDECREF(nullstring);
4162 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004163}
Barry Warsawa903ad982001-02-23 16:40:48 +00004164
Barry Warsawa903ad982001-02-23 16:40:48 +00004165void _Py_ReleaseInternedStrings(void)
4166{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004167 PyObject *keys;
4168 PyStringObject *s;
4169 int i, n;
4170
4171 if (interned == NULL || !PyDict_Check(interned))
4172 return;
4173 keys = PyDict_Keys(interned);
4174 if (keys == NULL || !PyList_Check(keys)) {
4175 PyErr_Clear();
4176 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004177 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004178
4179 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4180 detector, interned strings are not forcibly deallocated; rather, we
4181 give them their stolen references back, and then clear and DECREF
4182 the interned dict. */
4183
4184 fprintf(stderr, "releasing interned strings\n");
4185 n = PyList_GET_SIZE(keys);
4186 for (i = 0; i < n; i++) {
4187 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4188 switch (s->ob_sstate) {
4189 case SSTATE_NOT_INTERNED:
4190 /* XXX Shouldn't happen */
4191 break;
4192 case SSTATE_INTERNED_IMMORTAL:
4193 s->ob_refcnt += 1;
4194 break;
4195 case SSTATE_INTERNED_MORTAL:
4196 s->ob_refcnt += 2;
4197 break;
4198 default:
4199 Py_FatalError("Inconsistent interned string state.");
4200 }
4201 s->ob_sstate = SSTATE_NOT_INTERNED;
4202 }
4203 Py_DECREF(keys);
4204 PyDict_Clear(interned);
4205 Py_DECREF(interned);
4206 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004207}