blob: b399415e4e206f8aa08e4b230b406f90760b7bd7 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
Tim Petersae1d0c92006-03-17 03:29:34 +000019 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000020 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000108 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000157 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000184 /* likewise for %zd */
185 if (*f == 'z' && *(f+1) == 'd')
Tim Petersae1d0c92006-03-17 03:29:34 +0000186 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000187
Barry Warsawdadace02001-08-24 18:32:06 +0000188 switch (*f) {
189 case 'c':
190 (void)va_arg(count, int);
191 /* fall through... */
192 case '%':
193 n++;
194 break;
195 case 'd': case 'i': case 'x':
196 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000197 /* 20 bytes is enough to hold a 64-bit
198 integer. Decimal takes the most space.
199 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000200 n += 20;
201 break;
202 case 's':
203 s = va_arg(count, char*);
204 n += strlen(s);
205 break;
206 case 'p':
207 (void) va_arg(count, int);
208 /* maximum 64-bit pointer representation:
209 * 0xffffffffffffffff
210 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000211 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000212 */
213 n += 19;
214 break;
215 default:
216 /* if we stumble upon an unknown
217 formatting code, copy the rest of
218 the format string to the output
219 string. (we cannot just skip the
220 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000221 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000222 n += strlen(p);
223 goto expand;
224 }
225 } else
226 n++;
227 }
228 expand:
229 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000230 /* Since we've analyzed how much space we need for the worst case,
231 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 string = PyString_FromStringAndSize(NULL, n);
233 if (!string)
234 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000235
Barry Warsawdadace02001-08-24 18:32:06 +0000236 s = PyString_AsString(string);
237
238 for (f = format; *f; f++) {
239 if (*f == '%') {
240 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000241 Py_ssize_t i;
242 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000243 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 /* handle the size_t flag. */
264 if (*f == 'z' && *(f+1) == 'd') {
265 size_tflag = 1;
266 ++f;
267 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000268
Barry Warsawdadace02001-08-24 18:32:06 +0000269 switch (*f) {
270 case 'c':
271 *s++ = va_arg(vargs, int);
272 break;
273 case 'd':
274 if (longflag)
275 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000276 else if (size_tflag)
Neal Norwitz7fbd6912006-03-25 23:55:39 +0000277 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
Tim Petersae1d0c92006-03-17 03:29:34 +0000278 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000279 else
280 sprintf(s, "%d", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 'i':
284 sprintf(s, "%i", va_arg(vargs, int));
285 s += strlen(s);
286 break;
287 case 'x':
288 sprintf(s, "%x", va_arg(vargs, int));
289 s += strlen(s);
290 break;
291 case 's':
292 p = va_arg(vargs, char*);
293 i = strlen(p);
294 if (n > 0 && i > n)
295 i = n;
296 memcpy(s, p, i);
297 s += i;
298 break;
299 case 'p':
300 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000301 /* %p is ill-defined: ensure leading 0x. */
302 if (s[1] == 'X')
303 s[1] = 'x';
304 else if (s[1] != 'x') {
305 memmove(s+2, s, strlen(s)+1);
306 s[0] = '0';
307 s[1] = 'x';
308 }
Barry Warsawdadace02001-08-24 18:32:06 +0000309 s += strlen(s);
310 break;
311 case '%':
312 *s++ = '%';
313 break;
314 default:
315 strcpy(s, p);
316 s += strlen(s);
317 goto end;
318 }
319 } else
320 *s++ = *f;
321 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000322
Barry Warsawdadace02001-08-24 18:32:06 +0000323 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000324 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000325 return string;
326}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000327
Barry Warsawdadace02001-08-24 18:32:06 +0000328PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000330{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000332 va_list vargs;
333
334#ifdef HAVE_STDARG_PROTOTYPES
335 va_start(vargs, format);
336#else
337 va_start(vargs);
338#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000339 ret = PyString_FromFormatV(format, vargs);
340 va_end(vargs);
341 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342}
343
344
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000345PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000346 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000347 const char *encoding,
348 const char *errors)
349{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000350 PyObject *v, *str;
351
352 str = PyString_FromStringAndSize(s, size);
353 if (str == NULL)
354 return NULL;
355 v = PyString_AsDecodedString(str, encoding, errors);
356 Py_DECREF(str);
357 return v;
358}
359
360PyObject *PyString_AsDecodedObject(PyObject *str,
361 const char *encoding,
362 const char *errors)
363{
364 PyObject *v;
365
366 if (!PyString_Check(str)) {
367 PyErr_BadArgument();
368 goto onError;
369 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000370
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000371 if (encoding == NULL) {
372#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000373 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000374#else
375 PyErr_SetString(PyExc_ValueError, "no encoding specified");
376 goto onError;
377#endif
378 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379
380 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000381 v = PyCodec_Decode(str, encoding, errors);
382 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000384
385 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000386
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000387 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 return NULL;
389}
390
391PyObject *PyString_AsDecodedString(PyObject *str,
392 const char *encoding,
393 const char *errors)
394{
395 PyObject *v;
396
397 v = PyString_AsDecodedObject(str, encoding, errors);
398 if (v == NULL)
399 goto onError;
400
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000401#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000402 /* Convert Unicode to a string using the default encoding */
403 if (PyUnicode_Check(v)) {
404 PyObject *temp = v;
405 v = PyUnicode_AsEncodedString(v, NULL, NULL);
406 Py_DECREF(temp);
407 if (v == NULL)
408 goto onError;
409 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000410#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411 if (!PyString_Check(v)) {
412 PyErr_Format(PyExc_TypeError,
413 "decoder did not return a string object (type=%.400s)",
414 v->ob_type->tp_name);
415 Py_DECREF(v);
416 goto onError;
417 }
418
419 return v;
420
421 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000422 return NULL;
423}
424
425PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000426 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 const char *encoding,
428 const char *errors)
429{
430 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000431
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 str = PyString_FromStringAndSize(s, size);
433 if (str == NULL)
434 return NULL;
435 v = PyString_AsEncodedString(str, encoding, errors);
436 Py_DECREF(str);
437 return v;
438}
439
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000440PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 const char *encoding,
442 const char *errors)
443{
444 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000445
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446 if (!PyString_Check(str)) {
447 PyErr_BadArgument();
448 goto onError;
449 }
450
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000451 if (encoding == NULL) {
452#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454#else
455 PyErr_SetString(PyExc_ValueError, "no encoding specified");
456 goto onError;
457#endif
458 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459
460 /* Encode via the codec registry */
461 v = PyCodec_Encode(str, encoding, errors);
462 if (v == NULL)
463 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000464
465 return v;
466
467 onError:
468 return NULL;
469}
470
471PyObject *PyString_AsEncodedString(PyObject *str,
472 const char *encoding,
473 const char *errors)
474{
475 PyObject *v;
476
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000477 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000478 if (v == NULL)
479 goto onError;
480
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000481#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000482 /* Convert Unicode to a string using the default encoding */
483 if (PyUnicode_Check(v)) {
484 PyObject *temp = v;
485 v = PyUnicode_AsEncodedString(v, NULL, NULL);
486 Py_DECREF(temp);
487 if (v == NULL)
488 goto onError;
489 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000490#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 if (!PyString_Check(v)) {
492 PyErr_Format(PyExc_TypeError,
493 "encoder did not return a string object (type=%.400s)",
494 v->ob_type->tp_name);
495 Py_DECREF(v);
496 goto onError;
497 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000498
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000499 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000500
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 onError:
502 return NULL;
503}
504
Guido van Rossum234f9421993-06-17 12:35:49 +0000505static void
Fred Drakeba096332000-07-09 07:04:36 +0000506string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000507{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000508 switch (PyString_CHECK_INTERNED(op)) {
509 case SSTATE_NOT_INTERNED:
510 break;
511
512 case SSTATE_INTERNED_MORTAL:
513 /* revive dead object temporarily for DelItem */
514 op->ob_refcnt = 3;
515 if (PyDict_DelItem(interned, op) != 0)
516 Py_FatalError(
517 "deletion of interned string failed");
518 break;
519
520 case SSTATE_INTERNED_IMMORTAL:
521 Py_FatalError("Immortal interned string died.");
522
523 default:
524 Py_FatalError("Inconsistent interned string state.");
525 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000526 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000527}
528
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000529/* Unescape a backslash-escaped string. If unicode is non-zero,
530 the string is a u-literal. If recode_encoding is non-zero,
531 the string is UTF-8 encoded and should be re-encoded in the
532 specified encoding. */
533
534PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000535 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000536 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000537 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 const char *recode_encoding)
539{
540 int c;
541 char *p, *buf;
542 const char *end;
543 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000544 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000545 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 if (v == NULL)
547 return NULL;
548 p = buf = PyString_AsString(v);
549 end = s + len;
550 while (s < end) {
551 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000552 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000553#ifdef Py_USING_UNICODE
554 if (recode_encoding && (*s & 0x80)) {
555 PyObject *u, *w;
556 char *r;
557 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000558 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000559 t = s;
560 /* Decode non-ASCII bytes as UTF-8. */
561 while (t < end && (*t & 0x80)) t++;
562 u = PyUnicode_DecodeUTF8(s, t - s, errors);
563 if(!u) goto failed;
564
565 /* Recode them in target encoding. */
566 w = PyUnicode_AsEncodedString(
567 u, recode_encoding, errors);
568 Py_DECREF(u);
569 if (!w) goto failed;
570
571 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000572 assert(PyString_Check(w));
573 r = PyString_AS_STRING(w);
574 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575 memcpy(p, r, rn);
576 p += rn;
577 Py_DECREF(w);
578 s = t;
579 } else {
580 *p++ = *s++;
581 }
582#else
583 *p++ = *s++;
584#endif
585 continue;
586 }
587 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000588 if (s==end) {
589 PyErr_SetString(PyExc_ValueError,
590 "Trailing \\ in string");
591 goto failed;
592 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000593 switch (*s++) {
594 /* XXX This assumes ASCII! */
595 case '\n': break;
596 case '\\': *p++ = '\\'; break;
597 case '\'': *p++ = '\''; break;
598 case '\"': *p++ = '\"'; break;
599 case 'b': *p++ = '\b'; break;
600 case 'f': *p++ = '\014'; break; /* FF */
601 case 't': *p++ = '\t'; break;
602 case 'n': *p++ = '\n'; break;
603 case 'r': *p++ = '\r'; break;
604 case 'v': *p++ = '\013'; break; /* VT */
605 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
606 case '0': case '1': case '2': case '3':
607 case '4': case '5': case '6': case '7':
608 c = s[-1] - '0';
609 if ('0' <= *s && *s <= '7') {
610 c = (c<<3) + *s++ - '0';
611 if ('0' <= *s && *s <= '7')
612 c = (c<<3) + *s++ - '0';
613 }
614 *p++ = c;
615 break;
616 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000617 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000618 && isxdigit(Py_CHARMASK(s[1]))) {
619 unsigned int x = 0;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x = c - '0';
624 else if (islower(c))
625 x = 10 + c - 'a';
626 else
627 x = 10 + c - 'A';
628 x = x << 4;
629 c = Py_CHARMASK(*s);
630 s++;
631 if (isdigit(c))
632 x += c - '0';
633 else if (islower(c))
634 x += 10 + c - 'a';
635 else
636 x += 10 + c - 'A';
637 *p++ = x;
638 break;
639 }
640 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000641 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000642 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000643 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000644 }
645 if (strcmp(errors, "replace") == 0) {
646 *p++ = '?';
647 } else if (strcmp(errors, "ignore") == 0)
648 /* do nothing */;
649 else {
650 PyErr_Format(PyExc_ValueError,
651 "decoding error; "
652 "unknown error handling code: %.400s",
653 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656#ifndef Py_USING_UNICODE
657 case 'u':
658 case 'U':
659 case 'N':
660 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000661 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000662 "Unicode escapes not legal "
663 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#endif
667 default:
668 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000669 s--;
670 goto non_esc; /* an arbitry number of unescaped
671 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 }
673 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000674 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000675 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 return v;
677 failed:
678 Py_DECREF(v);
679 return NULL;
680}
681
Martin v. Löwis18e16552006-02-15 17:27:45 +0000682static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000683string_getsize(register PyObject *op)
684{
685 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000687 if (PyString_AsStringAndSize(op, &s, &len))
688 return -1;
689 return len;
690}
691
692static /*const*/ char *
693string_getbuffer(register PyObject *op)
694{
695 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000696 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697 if (PyString_AsStringAndSize(op, &s, &len))
698 return NULL;
699 return s;
700}
701
Martin v. Löwis18e16552006-02-15 17:27:45 +0000702Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000703PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705 if (!PyString_Check(op))
706 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000707 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000708}
709
710/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000711PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000713 if (!PyString_Check(op))
714 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000715 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716}
717
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718int
719PyString_AsStringAndSize(register PyObject *obj,
720 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000721 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000722{
723 if (s == NULL) {
724 PyErr_BadInternalCall();
725 return -1;
726 }
727
728 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000729#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 if (PyUnicode_Check(obj)) {
731 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
732 if (obj == NULL)
733 return -1;
734 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000735 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000736#endif
737 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000738 PyErr_Format(PyExc_TypeError,
739 "expected string or Unicode object, "
740 "%.200s found", obj->ob_type->tp_name);
741 return -1;
742 }
743 }
744
745 *s = PyString_AS_STRING(obj);
746 if (len != NULL)
747 *len = PyString_GET_SIZE(obj);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000748 else if (strlen(*s) != PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000749 PyErr_SetString(PyExc_TypeError,
750 "expected string without null bytes");
751 return -1;
752 }
753 return 0;
754}
755
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756/* Methods */
757
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000758static int
Fred Drakeba096332000-07-09 07:04:36 +0000759string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000760{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000761 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000762 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000763 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000764
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000765 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000766 if (! PyString_CheckExact(op)) {
767 int ret;
768 /* A str subclass may have its own __str__ method. */
769 op = (PyStringObject *) PyObject_Str((PyObject *)op);
770 if (op == NULL)
771 return -1;
772 ret = string_print(op, fp, flags);
773 Py_DECREF(op);
774 return ret;
775 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000776 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000777#ifdef __VMS
778 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
779#else
780 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
781#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000782 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000784
Thomas Wouters7e474022000-07-16 12:04:32 +0000785 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000786 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000787 if (memchr(op->ob_sval, '\'', op->ob_size) &&
788 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000789 quote = '"';
790
791 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 for (i = 0; i < op->ob_size; i++) {
793 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000795 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000796 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000797 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000798 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000799 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000800 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000801 fprintf(fp, "\\r");
802 else if (c < ' ' || c >= 0x7f)
803 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000804 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000805 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000806 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000807 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000808 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809}
810
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000811PyObject *
812PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000814 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000815 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000816 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000817 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000818 PyErr_SetString(PyExc_OverflowError,
819 "string is too large to make repr");
820 }
821 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000822 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000823 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000824 }
825 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000826 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 register char c;
828 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000829 int quote;
830
Thomas Wouters7e474022000-07-16 12:04:32 +0000831 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000833 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000834 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000835 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 quote = '"';
837
Tim Peters9161c8b2001-12-03 01:55:38 +0000838 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000840 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000841 /* There's at least enough room for a hex escape
842 and a closing quote. */
843 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000845 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000846 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\t')
848 *p++ = '\\', *p++ = 't';
849 else if (c == '\n')
850 *p++ = '\\', *p++ = 'n';
851 else if (c == '\r')
852 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000853 else if (c < ' ' || c >= 0x7f) {
854 /* For performance, we don't want to call
855 PyOS_snprintf here (extra layers of
856 function call). */
857 sprintf(p, "\\x%02x", c & 0xff);
858 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000859 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000860 else
861 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000862 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000863 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000864 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000866 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000867 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000868 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870}
871
Guido van Rossum189f1df2001-05-01 16:51:53 +0000872static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000873string_repr(PyObject *op)
874{
875 return PyString_Repr(op, 1);
876}
877
878static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000879string_str(PyObject *s)
880{
Tim Petersc9933152001-10-16 20:18:24 +0000881 assert(PyString_Check(s));
882 if (PyString_CheckExact(s)) {
883 Py_INCREF(s);
884 return s;
885 }
886 else {
887 /* Subtype -- return genuine string with the same value. */
888 PyStringObject *t = (PyStringObject *) s;
889 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
890 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000891}
892
Martin v. Löwis18e16552006-02-15 17:27:45 +0000893static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000894string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895{
896 return a->ob_size;
897}
898
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000899static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000900string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000902 register size_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000903 register PyStringObject *op;
904 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000905#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000906 if (PyUnicode_Check(bb))
907 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000908#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000909 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000910 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000911 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912 return NULL;
913 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000916 if ((a->ob_size == 0 || b->ob_size == 0) &&
917 PyString_CheckExact(a) && PyString_CheckExact(b)) {
918 if (a->ob_size == 0) {
919 Py_INCREF(bb);
920 return bb;
921 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000922 Py_INCREF(a);
923 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000924 }
925 size = a->ob_size + b->ob_size;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000926 /* XXX check overflow */
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000927 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000928 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000929 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000931 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000932 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000933 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000934 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
935 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000936 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000937 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000938#undef b
939}
940
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000942string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000944 register Py_ssize_t i;
945 register Py_ssize_t j;
946 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000948 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949 if (n < 0)
950 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000951 /* watch out for overflows: the size can overflow int,
952 * and the # of bytes needed can overflow size_t
953 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000955 if (n && size / n != a->ob_size) {
956 PyErr_SetString(PyExc_OverflowError,
957 "repeated string is too long");
958 return NULL;
959 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000960 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000961 Py_INCREF(a);
962 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000963 }
Tim Peterse7c05322004-06-27 17:24:49 +0000964 nbytes = (size_t)size;
965 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000966 PyErr_SetString(PyExc_OverflowError,
967 "repeated string is too long");
968 return NULL;
969 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000970 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000971 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000972 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000973 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000974 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000975 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000976 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000977 op->ob_sval[size] = '\0';
978 if (a->ob_size == 1 && n > 0) {
979 memset(op->ob_sval, a->ob_sval[0] , n);
980 return (PyObject *) op;
981 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000982 i = 0;
983 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000984 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
985 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000986 }
987 while (i < size) {
988 j = (i <= size-i) ? i : size-i;
989 memcpy(op->ob_sval+i, op->ob_sval, j);
990 i += j;
991 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000992 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993}
994
995/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
996
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +0000998string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000999 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001000 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001{
1002 if (i < 0)
1003 i = 0;
1004 if (j < 0)
1005 j = 0; /* Avoid signed/unsigned bug in next line */
1006 if (j > a->ob_size)
1007 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001008 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1009 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001010 Py_INCREF(a);
1011 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001012 }
1013 if (j < i)
1014 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001015 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001016}
1017
Guido van Rossum9284a572000-03-07 15:53:43 +00001018static int
Fred Drakeba096332000-07-09 07:04:36 +00001019string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001020{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001021 char *s = PyString_AS_STRING(a);
1022 const char *sub = PyString_AS_STRING(el);
1023 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001024 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001025 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001026 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001027
1028 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001029#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001030 if (PyUnicode_Check(el))
1031 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001032#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001033 if (!PyString_Check(el)) {
1034 PyErr_SetString(PyExc_TypeError,
1035 "'in <string>' requires string as left operand");
1036 return -1;
1037 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001038 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001039
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001040 if (len_sub == 0)
1041 return 1;
Tim Petersae1d0c92006-03-17 03:29:34 +00001042 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001043 substring. When s<last, there is still room for a possible match
1044 and s[0] through s[len_sub-1] will be in bounds.
1045 shortsub is len_sub minus the last character which is checked
1046 separately just before the memcmp(). That check helps prevent
1047 false starts and saves the setup time for memcmp().
1048 */
1049 firstchar = sub[0];
1050 shortsub = len_sub - 1;
1051 lastchar = sub[shortsub];
1052 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1053 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001054 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001055 if (s == NULL)
1056 return 0;
1057 assert(s < last);
1058 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001059 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001060 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001061 }
1062 return 0;
1063}
1064
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001065static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001066string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001068 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001069 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001071 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001072 return NULL;
1073 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001074 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001075 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001076 if (v == NULL)
1077 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001078 else {
1079#ifdef COUNT_ALLOCS
1080 one_strings++;
1081#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001082 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001083 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001084 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001085}
1086
Martin v. Löwiscd353062001-05-24 16:56:35 +00001087static PyObject*
1088string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001089{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001090 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001091 Py_ssize_t len_a, len_b;
1092 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001093 PyObject *result;
1094
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001095 /* Make sure both arguments are strings. */
1096 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001097 result = Py_NotImplemented;
1098 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001099 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001100 if (a == b) {
1101 switch (op) {
1102 case Py_EQ:case Py_LE:case Py_GE:
1103 result = Py_True;
1104 goto out;
1105 case Py_NE:case Py_LT:case Py_GT:
1106 result = Py_False;
1107 goto out;
1108 }
1109 }
1110 if (op == Py_EQ) {
1111 /* Supporting Py_NE here as well does not save
1112 much time, since Py_NE is rarely used. */
1113 if (a->ob_size == b->ob_size
1114 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001115 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001116 a->ob_size) == 0)) {
1117 result = Py_True;
1118 } else {
1119 result = Py_False;
1120 }
1121 goto out;
1122 }
1123 len_a = a->ob_size; len_b = b->ob_size;
1124 min_len = (len_a < len_b) ? len_a : len_b;
1125 if (min_len > 0) {
1126 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1127 if (c==0)
1128 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1129 }else
1130 c = 0;
1131 if (c == 0)
1132 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1133 switch (op) {
1134 case Py_LT: c = c < 0; break;
1135 case Py_LE: c = c <= 0; break;
1136 case Py_EQ: assert(0); break; /* unreachable */
1137 case Py_NE: c = c != 0; break;
1138 case Py_GT: c = c > 0; break;
1139 case Py_GE: c = c >= 0; break;
1140 default:
1141 result = Py_NotImplemented;
1142 goto out;
1143 }
1144 result = c ? Py_True : Py_False;
1145 out:
1146 Py_INCREF(result);
1147 return result;
1148}
1149
1150int
1151_PyString_Eq(PyObject *o1, PyObject *o2)
1152{
1153 PyStringObject *a, *b;
1154 a = (PyStringObject*)o1;
1155 b = (PyStringObject*)o2;
1156 return a->ob_size == b->ob_size
1157 && *a->ob_sval == *b->ob_sval
1158 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001159}
1160
Guido van Rossum9bfef441993-03-29 10:43:31 +00001161static long
Fred Drakeba096332000-07-09 07:04:36 +00001162string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001163{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001164 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001165 register unsigned char *p;
1166 register long x;
1167
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001168 if (a->ob_shash != -1)
1169 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001170 len = a->ob_size;
1171 p = (unsigned char *) a->ob_sval;
1172 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001173 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001174 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001175 x ^= a->ob_size;
1176 if (x == -1)
1177 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001178 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001179 return x;
1180}
1181
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001182#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1183
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001184static PyObject*
1185string_subscript(PyStringObject* self, PyObject* item)
1186{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001187 PyNumberMethods *nb = item->ob_type->tp_as_number;
1188 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1189 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001190 if (i == -1 && PyErr_Occurred())
1191 return NULL;
1192 if (i < 0)
1193 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001194 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001195 }
1196 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001197 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001198 char* source_buf;
1199 char* result_buf;
1200 PyObject* result;
1201
Tim Petersae1d0c92006-03-17 03:29:34 +00001202 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001203 PyString_GET_SIZE(self),
1204 &start, &stop, &step, &slicelength) < 0) {
1205 return NULL;
1206 }
1207
1208 if (slicelength <= 0) {
1209 return PyString_FromStringAndSize("", 0);
1210 }
1211 else {
1212 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001213 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001214 if (result_buf == NULL)
1215 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001216
Tim Petersae1d0c92006-03-17 03:29:34 +00001217 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001218 cur += step, i++) {
1219 result_buf[i] = source_buf[cur];
1220 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001221
1222 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001223 slicelength);
1224 PyMem_Free(result_buf);
1225 return result;
1226 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001227 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001229 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230 "string indices must be integers");
1231 return NULL;
1232 }
1233}
1234
Martin v. Löwis18e16552006-02-15 17:27:45 +00001235static Py_ssize_t
1236string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001237{
1238 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001239 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001240 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001241 return -1;
1242 }
1243 *ptr = (void *)self->ob_sval;
1244 return self->ob_size;
1245}
1246
Martin v. Löwis18e16552006-02-15 17:27:45 +00001247static Py_ssize_t
1248string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001249{
Guido van Rossum045e6881997-09-08 18:30:11 +00001250 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001251 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001252 return -1;
1253}
1254
Martin v. Löwis18e16552006-02-15 17:27:45 +00001255static Py_ssize_t
1256string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257{
1258 if ( lenp )
1259 *lenp = self->ob_size;
1260 return 1;
1261}
1262
Martin v. Löwis18e16552006-02-15 17:27:45 +00001263static Py_ssize_t
1264string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001265{
1266 if ( index != 0 ) {
1267 PyErr_SetString(PyExc_SystemError,
1268 "accessing non-existent string segment");
1269 return -1;
1270 }
1271 *ptr = self->ob_sval;
1272 return self->ob_size;
1273}
1274
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001275static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001276 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001277 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001278 (ssizeargfunc)string_repeat, /*sq_repeat*/
1279 (ssizeargfunc)string_item, /*sq_item*/
1280 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001281 0, /*sq_ass_item*/
1282 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001283 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001284};
1285
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001286static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001287 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001288 (binaryfunc)string_subscript,
1289 0,
1290};
1291
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001292static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (readbufferproc)string_buffer_getreadbuf,
1294 (writebufferproc)string_buffer_getwritebuf,
1295 (segcountproc)string_buffer_getsegcount,
1296 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297};
1298
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299
1300
1301#define LEFTSTRIP 0
1302#define RIGHTSTRIP 1
1303#define BOTHSTRIP 2
1304
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001305/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001306static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1307
1308#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001309
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001310#define SPLIT_APPEND(data, left, right) \
1311 str = PyString_FromStringAndSize((data) + (left), \
1312 (right) - (left)); \
1313 if (str == NULL) \
1314 goto onError; \
1315 if (PyList_Append(list, str)) { \
1316 Py_DECREF(str); \
1317 goto onError; \
1318 } \
1319 else \
1320 Py_DECREF(str);
1321
1322#define SPLIT_INSERT(data, left, right) \
1323 str = PyString_FromStringAndSize((data) + (left), \
1324 (right) - (left)); \
1325 if (str == NULL) \
1326 goto onError; \
1327 if (PyList_Insert(list, 0, str)) { \
1328 Py_DECREF(str); \
1329 goto onError; \
1330 } \
1331 else \
1332 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333
1334static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001335split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001337 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001338 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339 PyObject *list = PyList_New(0);
1340
1341 if (list == NULL)
1342 return NULL;
1343
Guido van Rossum4c08d552000-03-10 22:55:18 +00001344 for (i = j = 0; i < len; ) {
1345 while (i < len && isspace(Py_CHARMASK(s[i])))
1346 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001348 while (i < len && !isspace(Py_CHARMASK(s[i])))
1349 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001351 if (maxsplit-- <= 0)
1352 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001353 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001354 while (i < len && isspace(Py_CHARMASK(s[i])))
1355 i++;
1356 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001357 }
1358 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001360 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001361 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001363 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364 Py_DECREF(list);
1365 return NULL;
1366}
1367
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001368static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001369split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001370{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001371 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001372 PyObject *str;
1373 PyObject *list = PyList_New(0);
1374
1375 if (list == NULL)
1376 return NULL;
1377
1378 for (i = j = 0; i < len; ) {
1379 if (s[i] == ch) {
1380 if (maxcount-- <= 0)
1381 break;
1382 SPLIT_APPEND(s, j, i);
1383 i = j = i + 1;
1384 } else
1385 i++;
1386 }
1387 if (j <= len) {
1388 SPLIT_APPEND(s, j, len);
1389 }
1390 return list;
1391
1392 onError:
1393 Py_DECREF(list);
1394 return NULL;
1395}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001397PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398"S.split([sep [,maxsplit]]) -> list of strings\n\
1399\n\
1400Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001401delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001402splits are done. If sep is not specified or is None, any\n\
1403whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404
1405static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001406string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001408 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1409 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001410 Py_ssize_t maxsplit = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001411 const char *s = PyString_AS_STRING(self), *sub;
1412 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413
Martin v. Löwis9c830762006-04-13 08:37:17 +00001414 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001416 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001417 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001418 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001420 if (PyString_Check(subobj)) {
1421 sub = PyString_AS_STRING(subobj);
1422 n = PyString_GET_SIZE(subobj);
1423 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001424#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001425 else if (PyUnicode_Check(subobj))
1426 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001427#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001428 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1429 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001430
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431 if (n == 0) {
1432 PyErr_SetString(PyExc_ValueError, "empty separator");
1433 return NULL;
1434 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001435 else if (n == 1)
1436 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437
1438 list = PyList_New(0);
1439 if (list == NULL)
1440 return NULL;
1441
1442 i = j = 0;
1443 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001444 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001445 if (maxsplit-- <= 0)
1446 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001447 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448 if (item == NULL)
1449 goto fail;
1450 err = PyList_Append(list, item);
1451 Py_DECREF(item);
1452 if (err < 0)
1453 goto fail;
1454 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455 }
1456 else
1457 i++;
1458 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001459 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 if (item == NULL)
1461 goto fail;
1462 err = PyList_Append(list, item);
1463 Py_DECREF(item);
1464 if (err < 0)
1465 goto fail;
1466
1467 return list;
1468
1469 fail:
1470 Py_DECREF(list);
1471 return NULL;
1472}
1473
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001474static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001475rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001476{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001477 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001478 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001479 PyObject *list = PyList_New(0);
1480
1481 if (list == NULL)
1482 return NULL;
1483
1484 for (i = j = len - 1; i >= 0; ) {
1485 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1486 i--;
1487 j = i;
1488 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1489 i--;
1490 if (j > i) {
1491 if (maxsplit-- <= 0)
1492 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001493 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001494 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1495 i--;
1496 j = i;
1497 }
1498 }
1499 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001500 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001501 }
1502 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001503 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001504 Py_DECREF(list);
1505 return NULL;
1506}
1507
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001508static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001509rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001510{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001511 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001512 PyObject *str;
1513 PyObject *list = PyList_New(0);
1514
1515 if (list == NULL)
1516 return NULL;
1517
1518 for (i = j = len - 1; i >= 0; ) {
1519 if (s[i] == ch) {
1520 if (maxcount-- <= 0)
1521 break;
1522 SPLIT_INSERT(s, i + 1, j + 1);
1523 j = i = i - 1;
1524 } else
1525 i--;
1526 }
1527 if (j >= -1) {
1528 SPLIT_INSERT(s, 0, j + 1);
1529 }
1530 return list;
1531
1532 onError:
1533 Py_DECREF(list);
1534 return NULL;
1535}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001536
1537PyDoc_STRVAR(rsplit__doc__,
1538"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1539\n\
1540Return a list of the words in the string S, using sep as the\n\
1541delimiter string, starting at the end of the string and working\n\
1542to the front. If maxsplit is given, at most maxsplit splits are\n\
1543done. If sep is not specified or is None, any whitespace string\n\
1544is a separator.");
1545
1546static PyObject *
1547string_rsplit(PyStringObject *self, PyObject *args)
1548{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001549 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1550 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001551 Py_ssize_t maxsplit = -1;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001552 const char *s = PyString_AS_STRING(self), *sub;
1553 PyObject *list, *item, *subobj = Py_None;
1554
Martin v. Löwis9c830762006-04-13 08:37:17 +00001555 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001556 return NULL;
1557 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001558 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001559 if (subobj == Py_None)
1560 return rsplit_whitespace(s, len, maxsplit);
1561 if (PyString_Check(subobj)) {
1562 sub = PyString_AS_STRING(subobj);
1563 n = PyString_GET_SIZE(subobj);
1564 }
1565#ifdef Py_USING_UNICODE
1566 else if (PyUnicode_Check(subobj))
1567 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1568#endif
1569 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1570 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001571
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001572 if (n == 0) {
1573 PyErr_SetString(PyExc_ValueError, "empty separator");
1574 return NULL;
1575 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001576 else if (n == 1)
1577 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001578
1579 list = PyList_New(0);
1580 if (list == NULL)
1581 return NULL;
1582
1583 j = len;
1584 i = j - n;
1585 while (i >= 0) {
1586 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1587 if (maxsplit-- <= 0)
1588 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001589 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001590 if (item == NULL)
1591 goto fail;
1592 err = PyList_Insert(list, 0, item);
1593 Py_DECREF(item);
1594 if (err < 0)
1595 goto fail;
1596 j = i;
1597 i -= n;
1598 }
1599 else
1600 i--;
1601 }
1602 item = PyString_FromStringAndSize(s, j);
1603 if (item == NULL)
1604 goto fail;
1605 err = PyList_Insert(list, 0, item);
1606 Py_DECREF(item);
1607 if (err < 0)
1608 goto fail;
1609
1610 return list;
1611
1612 fail:
1613 Py_DECREF(list);
1614 return NULL;
1615}
1616
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001618PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619"S.join(sequence) -> string\n\
1620\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001622sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623
1624static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001625string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626{
1627 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001628 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001631 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001632 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001633 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001634 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635
Tim Peters19fe14e2001-01-19 03:03:47 +00001636 seq = PySequence_Fast(orig, "");
1637 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638 return NULL;
1639 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001640
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001641 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001642 if (seqlen == 0) {
1643 Py_DECREF(seq);
1644 return PyString_FromString("");
1645 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001647 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001648 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1649 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001650 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001651 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001652 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001654
Raymond Hettinger674f2412004-08-23 23:23:54 +00001655 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001656 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001657 * Do a pre-pass to figure out the total amount of space we'll
1658 * need (sz), see whether any argument is absurd, and defer to
1659 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001660 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001661 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001662 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001663 item = PySequence_Fast_GET_ITEM(seq, i);
1664 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001665#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001666 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001667 /* Defer to Unicode join.
1668 * CAUTION: There's no gurantee that the
1669 * original sequence can be iterated over
1670 * again, so we must pass seq here.
1671 */
1672 PyObject *result;
1673 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001674 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001675 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001676 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001677#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001678 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001679 "sequence item %i: expected string,"
1680 " %.80s found",
Martin v. Löwis18e16552006-02-15 17:27:45 +00001681 /*XXX*/(int)i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001682 Py_DECREF(seq);
1683 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001684 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001685 sz += PyString_GET_SIZE(item);
1686 if (i != 0)
1687 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001688 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001689 PyErr_SetString(PyExc_OverflowError,
1690 "join() is too long for a Python string");
1691 Py_DECREF(seq);
1692 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001693 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001694 }
1695
1696 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001697 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001698 if (res == NULL) {
1699 Py_DECREF(seq);
1700 return NULL;
1701 }
1702
1703 /* Catenate everything. */
1704 p = PyString_AS_STRING(res);
1705 for (i = 0; i < seqlen; ++i) {
1706 size_t n;
1707 item = PySequence_Fast_GET_ITEM(seq, i);
1708 n = PyString_GET_SIZE(item);
1709 memcpy(p, PyString_AS_STRING(item), n);
1710 p += n;
1711 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001712 memcpy(p, sep, seplen);
1713 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001714 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001715 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001716
Jeremy Hylton49048292000-07-11 03:28:17 +00001717 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719}
1720
Tim Peters52e155e2001-06-16 05:42:57 +00001721PyObject *
1722_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001723{
Tim Petersa7259592001-06-16 05:11:17 +00001724 assert(sep != NULL && PyString_Check(sep));
1725 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001726 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001727}
1728
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001729static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001730string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001731{
1732 if (*end > len)
1733 *end = len;
1734 else if (*end < 0)
1735 *end += len;
1736 if (*end < 0)
1737 *end = 0;
1738 if (*start < 0)
1739 *start += len;
1740 if (*start < 0)
1741 *start = 0;
1742}
1743
Martin v. Löwis18e16552006-02-15 17:27:45 +00001744static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001745string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001747 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001748 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001749 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001750 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751
Martin v. Löwis18e16552006-02-15 17:27:45 +00001752 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001753 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001754 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 return -2;
1756 if (PyString_Check(subobj)) {
1757 sub = PyString_AS_STRING(subobj);
1758 n = PyString_GET_SIZE(subobj);
1759 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001760#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001761 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001762 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001763#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001764 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 return -2;
1766
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001767 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768
Guido van Rossum4c08d552000-03-10 22:55:18 +00001769 if (dir > 0) {
1770 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001771 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001772 last -= n;
1773 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001774 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001775 return (long)i;
1776 }
1777 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001778 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001779
Guido van Rossum4c08d552000-03-10 22:55:18 +00001780 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001781 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001782 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001783 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001784 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001785 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001786
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787 return -1;
1788}
1789
1790
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001791PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792"S.find(sub [,start [,end]]) -> int\n\
1793\n\
1794Return the lowest index in S where substring sub is found,\n\
1795such that sub is contained within s[start,end]. Optional\n\
1796arguments start and end are interpreted as in slice notation.\n\
1797\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001798Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799
1800static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001801string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001803 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804 if (result == -2)
1805 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001806 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807}
1808
1809
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001810PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811"S.index(sub [,start [,end]]) -> int\n\
1812\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001813Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814
1815static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001816string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001818 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 if (result == -2)
1820 return NULL;
1821 if (result == -1) {
1822 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001823 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824 return NULL;
1825 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001826 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827}
1828
1829
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001830PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831"S.rfind(sub [,start [,end]]) -> int\n\
1832\n\
1833Return the highest index in S where substring sub is found,\n\
1834such that sub is contained within s[start,end]. Optional\n\
1835arguments start and end are interpreted as in slice notation.\n\
1836\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001837Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001838
1839static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001840string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001842 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843 if (result == -2)
1844 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001845 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846}
1847
1848
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001849PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850"S.rindex(sub [,start [,end]]) -> int\n\
1851\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001852Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853
1854static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001855string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001857 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858 if (result == -2)
1859 return NULL;
1860 if (result == -1) {
1861 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001862 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863 return NULL;
1864 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001865 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866}
1867
1868
1869static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001870do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1871{
1872 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001873 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001874 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001875 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1876 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001877
1878 i = 0;
1879 if (striptype != RIGHTSTRIP) {
1880 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1881 i++;
1882 }
1883 }
1884
1885 j = len;
1886 if (striptype != LEFTSTRIP) {
1887 do {
1888 j--;
1889 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1890 j++;
1891 }
1892
1893 if (i == 0 && j == len && PyString_CheckExact(self)) {
1894 Py_INCREF(self);
1895 return (PyObject*)self;
1896 }
1897 else
1898 return PyString_FromStringAndSize(s+i, j-i);
1899}
1900
1901
1902static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001903do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904{
1905 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001906 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 i = 0;
1909 if (striptype != RIGHTSTRIP) {
1910 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1911 i++;
1912 }
1913 }
1914
1915 j = len;
1916 if (striptype != LEFTSTRIP) {
1917 do {
1918 j--;
1919 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1920 j++;
1921 }
1922
Tim Peters8fa5dd02001-09-12 02:18:30 +00001923 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 Py_INCREF(self);
1925 return (PyObject*)self;
1926 }
1927 else
1928 return PyString_FromStringAndSize(s+i, j-i);
1929}
1930
1931
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001932static PyObject *
1933do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1934{
1935 PyObject *sep = NULL;
1936
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001937 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001938 return NULL;
1939
1940 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001941 if (PyString_Check(sep))
1942 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001943#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001944 else if (PyUnicode_Check(sep)) {
1945 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1946 PyObject *res;
1947 if (uniself==NULL)
1948 return NULL;
1949 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1950 striptype, sep);
1951 Py_DECREF(uniself);
1952 return res;
1953 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001954#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00001955 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001956#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00001957 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001958#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00001959 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001960#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00001961 STRIPNAME(striptype));
1962 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001963 }
1964
1965 return do_strip(self, striptype);
1966}
1967
1968
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001969PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001970"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971\n\
1972Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001973whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001974If chars is given and not None, remove characters in chars instead.\n\
1975If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976
1977static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001978string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001980 if (PyTuple_GET_SIZE(args) == 0)
1981 return do_strip(self, BOTHSTRIP); /* Common case */
1982 else
1983 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984}
1985
1986
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001987PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001988"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001990Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001991If chars is given and not None, remove characters in chars instead.\n\
1992If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993
1994static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001995string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001997 if (PyTuple_GET_SIZE(args) == 0)
1998 return do_strip(self, LEFTSTRIP); /* Common case */
1999 else
2000 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001}
2002
2003
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002004PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002005"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002007Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002008If chars is given and not None, remove characters in chars instead.\n\
2009If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002010
2011static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002012string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002014 if (PyTuple_GET_SIZE(args) == 0)
2015 return do_strip(self, RIGHTSTRIP); /* Common case */
2016 else
2017 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018}
2019
2020
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002021PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022"S.lower() -> string\n\
2023\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002024Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025
2026static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002027string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028{
2029 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002030 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002031 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032
Anthony Baxtera6286212006-04-11 07:42:36 +00002033 newobj = PyString_FromStringAndSize(NULL, n);
2034 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002036 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037 for (i = 0; i < n; i++) {
2038 int c = Py_CHARMASK(*s++);
2039 if (isupper(c)) {
2040 *s_new = tolower(c);
2041 } else
2042 *s_new = c;
2043 s_new++;
2044 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002045 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046}
2047
2048
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002049PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002050"S.upper() -> string\n\
2051\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002052Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053
2054static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002055string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002056{
2057 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002058 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002059 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002060
Anthony Baxtera6286212006-04-11 07:42:36 +00002061 newobj = PyString_FromStringAndSize(NULL, n);
2062 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002064 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002065 for (i = 0; i < n; i++) {
2066 int c = Py_CHARMASK(*s++);
2067 if (islower(c)) {
2068 *s_new = toupper(c);
2069 } else
2070 *s_new = c;
2071 s_new++;
2072 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002073 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002074}
2075
2076
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002077PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078"S.title() -> string\n\
2079\n\
2080Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002081characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002082
2083static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002084string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085{
2086 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002087 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002089 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002090
Anthony Baxtera6286212006-04-11 07:42:36 +00002091 newobj = PyString_FromStringAndSize(NULL, n);
2092 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002093 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002094 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002095 for (i = 0; i < n; i++) {
2096 int c = Py_CHARMASK(*s++);
2097 if (islower(c)) {
2098 if (!previous_is_cased)
2099 c = toupper(c);
2100 previous_is_cased = 1;
2101 } else if (isupper(c)) {
2102 if (previous_is_cased)
2103 c = tolower(c);
2104 previous_is_cased = 1;
2105 } else
2106 previous_is_cased = 0;
2107 *s_new++ = c;
2108 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002109 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002110}
2111
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002112PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002113"S.capitalize() -> string\n\
2114\n\
2115Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002116capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002117
2118static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002119string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120{
2121 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002122 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002123 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124
Anthony Baxtera6286212006-04-11 07:42:36 +00002125 newobj = PyString_FromStringAndSize(NULL, n);
2126 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002128 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129 if (0 < n) {
2130 int c = Py_CHARMASK(*s++);
2131 if (islower(c))
2132 *s_new = toupper(c);
2133 else
2134 *s_new = c;
2135 s_new++;
2136 }
2137 for (i = 1; i < n; i++) {
2138 int c = Py_CHARMASK(*s++);
2139 if (isupper(c))
2140 *s_new = tolower(c);
2141 else
2142 *s_new = c;
2143 s_new++;
2144 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002145 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146}
2147
2148
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002149PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150"S.count(sub[, start[, end]]) -> int\n\
2151\n\
2152Return the number of occurrences of substring sub in string\n\
2153S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002154interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155
2156static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002157string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002159 const char *s = PyString_AS_STRING(self), *sub, *t;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002160 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002161 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002162 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002163 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164
Guido van Rossumc6821402000-05-08 14:08:05 +00002165 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2166 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002168
Guido van Rossum4c08d552000-03-10 22:55:18 +00002169 if (PyString_Check(subobj)) {
2170 sub = PyString_AS_STRING(subobj);
2171 n = PyString_GET_SIZE(subobj);
2172 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002173#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002174 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002175 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002176 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2177 if (count == -1)
2178 return NULL;
2179 else
2180 return PyInt_FromLong((long) count);
2181 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002182#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002183 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2184 return NULL;
2185
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002186 string_adjust_indices(&i, &last, len);
2187
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188 m = last + 1 - n;
2189 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002190 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191
2192 r = 0;
2193 while (i < m) {
2194 if (!memcmp(s+i, sub, n)) {
2195 r++;
2196 i += n;
2197 } else {
2198 i++;
2199 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002200 if (i >= m)
2201 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002202 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002203 if (t == NULL)
2204 break;
2205 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002206 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002207 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002208}
2209
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002210PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211"S.swapcase() -> string\n\
2212\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002213Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002214converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215
2216static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002217string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218{
2219 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002220 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002221 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222
Anthony Baxtera6286212006-04-11 07:42:36 +00002223 newobj = PyString_FromStringAndSize(NULL, n);
2224 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002226 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 for (i = 0; i < n; i++) {
2228 int c = Py_CHARMASK(*s++);
2229 if (islower(c)) {
2230 *s_new = toupper(c);
2231 }
2232 else if (isupper(c)) {
2233 *s_new = tolower(c);
2234 }
2235 else
2236 *s_new = c;
2237 s_new++;
2238 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002239 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240}
2241
2242
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002243PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244"S.translate(table [,deletechars]) -> string\n\
2245\n\
2246Return a copy of the string S, where all characters occurring\n\
2247in the optional argument deletechars are removed, and the\n\
2248remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002249translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250
2251static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002252string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254 register char *input, *output;
2255 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002256 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002258 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002259 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 PyObject *result;
2261 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002264 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002266 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002267
2268 if (PyString_Check(tableobj)) {
2269 table1 = PyString_AS_STRING(tableobj);
2270 tablen = PyString_GET_SIZE(tableobj);
2271 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002272#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002274 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002275 parameter; instead a mapping to None will cause characters
2276 to be deleted. */
2277 if (delobj != NULL) {
2278 PyErr_SetString(PyExc_TypeError,
2279 "deletions are implemented differently for unicode");
2280 return NULL;
2281 }
2282 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2283 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002284#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002285 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287
Martin v. Löwis00b61272002-12-12 20:03:19 +00002288 if (tablen != 256) {
2289 PyErr_SetString(PyExc_ValueError,
2290 "translation table must be 256 characters long");
2291 return NULL;
2292 }
2293
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294 if (delobj != NULL) {
2295 if (PyString_Check(delobj)) {
2296 del_table = PyString_AS_STRING(delobj);
2297 dellen = PyString_GET_SIZE(delobj);
2298 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002299#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300 else if (PyUnicode_Check(delobj)) {
2301 PyErr_SetString(PyExc_TypeError,
2302 "deletions are implemented differently for unicode");
2303 return NULL;
2304 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002305#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2307 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308 }
2309 else {
2310 del_table = NULL;
2311 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312 }
2313
2314 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002315 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316 result = PyString_FromStringAndSize((char *)NULL, inlen);
2317 if (result == NULL)
2318 return NULL;
2319 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002320 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321
2322 if (dellen == 0) {
2323 /* If no deletions are required, use faster code */
2324 for (i = inlen; --i >= 0; ) {
2325 c = Py_CHARMASK(*input++);
2326 if (Py_CHARMASK((*output++ = table[c])) != c)
2327 changed = 1;
2328 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002329 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002330 return result;
2331 Py_DECREF(result);
2332 Py_INCREF(input_obj);
2333 return input_obj;
2334 }
2335
2336 for (i = 0; i < 256; i++)
2337 trans_table[i] = Py_CHARMASK(table[i]);
2338
2339 for (i = 0; i < dellen; i++)
2340 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2341
2342 for (i = inlen; --i >= 0; ) {
2343 c = Py_CHARMASK(*input++);
2344 if (trans_table[c] != -1)
2345 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2346 continue;
2347 changed = 1;
2348 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002349 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350 Py_DECREF(result);
2351 Py_INCREF(input_obj);
2352 return input_obj;
2353 }
2354 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002355 if (inlen > 0)
2356 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357 return result;
2358}
2359
2360
2361/* What follows is used for implementing replace(). Perry Stoll. */
2362
2363/*
2364 mymemfind
2365
2366 strstr replacement for arbitrary blocks of memory.
2367
Barry Warsaw51ac5802000-03-20 16:36:48 +00002368 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369 contents of memory pointed to by PAT. Returns the index into MEM if
2370 found, or -1 if not found. If len of PAT is greater than length of
2371 MEM, the function returns -1.
2372*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00002373static Py_ssize_t
2374mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002375{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002376 register Py_ssize_t ii;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377
2378 /* pattern can not occur in the last pat_len-1 chars */
2379 len -= pat_len;
2380
2381 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002382 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383 return ii;
2384 }
2385 }
2386 return -1;
2387}
2388
2389/*
2390 mymemcnt
2391
2392 Return the number of distinct times PAT is found in MEM.
2393 meaning mem=1111 and pat==11 returns 2.
2394 mem=11111 and pat==11 also return 2.
2395 */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002396static Py_ssize_t
2397mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002399 register Py_ssize_t offset = 0;
2400 Py_ssize_t nfound = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002401
2402 while (len >= 0) {
2403 offset = mymemfind(mem, len, pat, pat_len);
2404 if (offset == -1)
2405 break;
2406 mem += offset + pat_len;
2407 len -= offset + pat_len;
2408 nfound++;
2409 }
2410 return nfound;
2411}
2412
2413/*
2414 mymemreplace
2415
Thomas Wouters7e474022000-07-16 12:04:32 +00002416 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417 replaced with SUB.
2418
Thomas Wouters7e474022000-07-16 12:04:32 +00002419 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002420 of PAT in STR, then the original string is returned. Otherwise, a new
2421 string is allocated here and returned.
2422
2423 on return, out_len is:
2424 the length of output string, or
2425 -1 if the input string is returned, or
2426 unchanged if an error occurs (no memory).
2427
2428 return value is:
2429 the new string allocated locally, or
2430 NULL if an error occurred.
2431*/
2432static char *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002433mymemreplace(const char *str, Py_ssize_t len, /* input string */
2434 const char *pat, Py_ssize_t pat_len, /* pattern string to find */
2435 const char *sub, Py_ssize_t sub_len, /* substitution string */
2436 Py_ssize_t count, /* number of replacements */
2437 Py_ssize_t *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438{
2439 char *out_s;
2440 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002441 Py_ssize_t nfound, offset, new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002442
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002443 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444 goto return_same;
2445
2446 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002447 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002448 if (count < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002449 count = PY_SSIZE_T_MAX;
Tim Peters9c012af2001-05-10 00:32:57 +00002450 else if (nfound > count)
2451 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002452 if (nfound == 0)
2453 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002454
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002456 if (new_len == 0) {
2457 /* Have to allocate something for the caller to free(). */
2458 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002459 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002460 return NULL;
2461 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002462 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002463 else {
2464 assert(new_len > 0);
2465 new_s = (char *)PyMem_MALLOC(new_len);
2466 if (new_s == NULL)
2467 return NULL;
2468 out_s = new_s;
2469
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002470 if (pat_len > 0) {
2471 for (; nfound > 0; --nfound) {
2472 /* find index of next instance of pattern */
2473 offset = mymemfind(str, len, pat, pat_len);
2474 if (offset == -1)
2475 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002476
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002477 /* copy non matching part of input string */
2478 memcpy(new_s, str, offset);
2479 str += offset + pat_len;
2480 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002481
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002482 /* copy substitute into the output string */
2483 new_s += offset;
2484 memcpy(new_s, sub, sub_len);
2485 new_s += sub_len;
2486 }
2487 /* copy any remaining values into output string */
2488 if (len > 0)
2489 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002490 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002491 else {
2492 for (;;++str, --len) {
2493 memcpy(new_s, sub, sub_len);
2494 new_s += sub_len;
2495 if (--nfound <= 0) {
2496 memcpy(new_s, str, len);
2497 break;
2498 }
2499 *new_s++ = *str;
2500 }
2501 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002502 }
2503 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002504 return out_s;
2505
2506 return_same:
2507 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002508 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002509}
2510
2511
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002512PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002513"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002514\n\
2515Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002516old replaced by new. If the optional argument count is\n\
2517given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002518
2519static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002520string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522 const char *str = PyString_AS_STRING(self), *sub, *repl;
2523 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002524 const Py_ssize_t len = PyString_GET_SIZE(self);
2525 Py_ssize_t sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526 int count = -1;
Anthony Baxtera6286212006-04-11 07:42:36 +00002527 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002529
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530 if (!PyArg_ParseTuple(args, "OO|i:replace",
2531 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002532 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533
2534 if (PyString_Check(subobj)) {
2535 sub = PyString_AS_STRING(subobj);
2536 sub_len = PyString_GET_SIZE(subobj);
2537 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002538#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002539 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002540 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002541 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002542#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002543 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2544 return NULL;
2545
2546 if (PyString_Check(replobj)) {
2547 repl = PyString_AS_STRING(replobj);
2548 repl_len = PyString_GET_SIZE(replobj);
2549 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002550#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002551 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002552 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002553 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002554#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002555 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2556 return NULL;
2557
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002559 if (new_s == NULL) {
2560 PyErr_NoMemory();
2561 return NULL;
2562 }
2563 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002564 if (PyString_CheckExact(self)) {
2565 /* we're returning another reference to self */
Anthony Baxtera6286212006-04-11 07:42:36 +00002566 newobj = (PyObject*)self;
2567 Py_INCREF(newobj);
Tim Peters8fa5dd02001-09-12 02:18:30 +00002568 }
2569 else {
Anthony Baxtera6286212006-04-11 07:42:36 +00002570 newobj = PyString_FromStringAndSize(str, len);
2571 if (newobj == NULL)
Tim Peters8fa5dd02001-09-12 02:18:30 +00002572 return NULL;
2573 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002574 }
2575 else {
Anthony Baxtera6286212006-04-11 07:42:36 +00002576 newobj = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002577 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002578 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002579 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002580}
2581
2582
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002583PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002584"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002585\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002586Return True if S starts with the specified prefix, False otherwise.\n\
2587With optional start, test S beginning at that position.\n\
2588With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002589
2590static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002591string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002592{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002593 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002594 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002595 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002596 Py_ssize_t plen;
2597 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002598 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002600
Guido van Rossumc6821402000-05-08 14:08:05 +00002601 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2602 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002603 return NULL;
2604 if (PyString_Check(subobj)) {
2605 prefix = PyString_AS_STRING(subobj);
2606 plen = PyString_GET_SIZE(subobj);
2607 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002608#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002609 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002610 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002611 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002612 subobj, start, end, -1);
2613 if (rc == -1)
2614 return NULL;
2615 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002616 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002617 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002618#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002619 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002620 return NULL;
2621
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002622 string_adjust_indices(&start, &end, len);
2623
2624 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002625 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002626
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002627 if (end-start >= plen)
2628 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2629 else
2630 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002631}
2632
2633
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002634PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002635"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002636\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002637Return True if S ends with the specified suffix, False otherwise.\n\
2638With optional start, test S beginning at that position.\n\
2639With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002640
2641static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002642string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002643{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002644 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002645 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002647 Py_ssize_t slen;
2648 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002649 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002650 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002651
Guido van Rossumc6821402000-05-08 14:08:05 +00002652 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2653 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002654 return NULL;
2655 if (PyString_Check(subobj)) {
2656 suffix = PyString_AS_STRING(subobj);
2657 slen = PyString_GET_SIZE(subobj);
2658 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002659#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002660 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002661 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002662 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002663 subobj, start, end, +1);
2664 if (rc == -1)
2665 return NULL;
2666 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002667 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002668 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002669#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002670 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002671 return NULL;
2672
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002673 string_adjust_indices(&start, &end, len);
2674
2675 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002676 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002677
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002678 if (end-slen > start)
2679 start = end - slen;
2680 if (end-start >= slen)
2681 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2682 else
2683 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002684}
2685
2686
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002687PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002688"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002689\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002690Encodes S using the codec registered for encoding. encoding defaults\n\
2691to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002692handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002693a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2694'xmlcharrefreplace' as well as any other name registered with\n\
2695codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002696
2697static PyObject *
2698string_encode(PyStringObject *self, PyObject *args)
2699{
2700 char *encoding = NULL;
2701 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002702 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00002703
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002704 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2705 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002706 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002707 if (v == NULL)
2708 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002709 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2710 PyErr_Format(PyExc_TypeError,
2711 "encoder did not return a string/unicode object "
2712 "(type=%.400s)",
2713 v->ob_type->tp_name);
2714 Py_DECREF(v);
2715 return NULL;
2716 }
2717 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002718
2719 onError:
2720 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002721}
2722
2723
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002724PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002725"S.decode([encoding[,errors]]) -> object\n\
2726\n\
2727Decodes S using the codec registered for encoding. encoding defaults\n\
2728to the default encoding. errors may be given to set a different error\n\
2729handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002730a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2731as well as any other name registerd with codecs.register_error that is\n\
2732able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002733
2734static PyObject *
2735string_decode(PyStringObject *self, PyObject *args)
2736{
2737 char *encoding = NULL;
2738 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002739 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00002740
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002741 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2742 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002743 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002744 if (v == NULL)
2745 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002746 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2747 PyErr_Format(PyExc_TypeError,
2748 "decoder did not return a string/unicode object "
2749 "(type=%.400s)",
2750 v->ob_type->tp_name);
2751 Py_DECREF(v);
2752 return NULL;
2753 }
2754 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002755
2756 onError:
2757 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002758}
2759
2760
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002761PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002762"S.expandtabs([tabsize]) -> string\n\
2763\n\
2764Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002765If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002766
2767static PyObject*
2768string_expandtabs(PyStringObject *self, PyObject *args)
2769{
2770 const char *e, *p;
2771 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002772 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002773 PyObject *u;
2774 int tabsize = 8;
2775
2776 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2777 return NULL;
2778
Thomas Wouters7e474022000-07-16 12:04:32 +00002779 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002780 i = j = 0;
2781 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2782 for (p = PyString_AS_STRING(self); p < e; p++)
2783 if (*p == '\t') {
2784 if (tabsize > 0)
2785 j += tabsize - (j % tabsize);
2786 }
2787 else {
2788 j++;
2789 if (*p == '\n' || *p == '\r') {
2790 i += j;
2791 j = 0;
2792 }
2793 }
2794
2795 /* Second pass: create output string and fill it */
2796 u = PyString_FromStringAndSize(NULL, i + j);
2797 if (!u)
2798 return NULL;
2799
2800 j = 0;
2801 q = PyString_AS_STRING(u);
2802
2803 for (p = PyString_AS_STRING(self); p < e; p++)
2804 if (*p == '\t') {
2805 if (tabsize > 0) {
2806 i = tabsize - (j % tabsize);
2807 j += i;
2808 while (i--)
2809 *q++ = ' ';
2810 }
2811 }
2812 else {
2813 j++;
2814 *q++ = *p;
2815 if (*p == '\n' || *p == '\r')
2816 j = 0;
2817 }
2818
2819 return u;
2820}
2821
Tim Peters8fa5dd02001-09-12 02:18:30 +00002822static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002823pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002824{
2825 PyObject *u;
2826
2827 if (left < 0)
2828 left = 0;
2829 if (right < 0)
2830 right = 0;
2831
Tim Peters8fa5dd02001-09-12 02:18:30 +00002832 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002833 Py_INCREF(self);
2834 return (PyObject *)self;
2835 }
2836
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002837 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002838 left + PyString_GET_SIZE(self) + right);
2839 if (u) {
2840 if (left)
2841 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002842 memcpy(PyString_AS_STRING(u) + left,
2843 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002844 PyString_GET_SIZE(self));
2845 if (right)
2846 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2847 fill, right);
2848 }
2849
2850 return u;
2851}
2852
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002853PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002854"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002855"\n"
2856"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002857"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002858
2859static PyObject *
2860string_ljust(PyStringObject *self, PyObject *args)
2861{
2862 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002863 char fillchar = ' ';
2864
2865 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002866 return NULL;
2867
Tim Peters8fa5dd02001-09-12 02:18:30 +00002868 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869 Py_INCREF(self);
2870 return (PyObject*) self;
2871 }
2872
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002873 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002874}
2875
2876
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002877PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002878"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002879"\n"
2880"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002881"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002882
2883static PyObject *
2884string_rjust(PyStringObject *self, PyObject *args)
2885{
2886 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002887 char fillchar = ' ';
2888
2889 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002890 return NULL;
2891
Tim Peters8fa5dd02001-09-12 02:18:30 +00002892 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002893 Py_INCREF(self);
2894 return (PyObject*) self;
2895 }
2896
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002897 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002898}
2899
2900
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002901PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002902"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002903"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002904"Return S centered in a string of length width. Padding is\n"
2905"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002906
2907static PyObject *
2908string_center(PyStringObject *self, PyObject *args)
2909{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002910 Py_ssize_t marg, left;
2911 long width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002912 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002913
Martin v. Löwis18e16552006-02-15 17:27:45 +00002914 if (!PyArg_ParseTuple(args, "l|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002915 return NULL;
2916
Tim Peters8fa5dd02001-09-12 02:18:30 +00002917 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002918 Py_INCREF(self);
2919 return (PyObject*) self;
2920 }
2921
2922 marg = width - PyString_GET_SIZE(self);
2923 left = marg / 2 + (marg & width & 1);
2924
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002925 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002926}
2927
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002928PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002929"S.zfill(width) -> string\n"
2930"\n"
2931"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002932"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002933
2934static PyObject *
2935string_zfill(PyStringObject *self, PyObject *args)
2936{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002937 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00002938 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002939 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002940
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002941 long width;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002942 if (!PyArg_ParseTuple(args, "l:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00002943 return NULL;
2944
2945 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002946 if (PyString_CheckExact(self)) {
2947 Py_INCREF(self);
2948 return (PyObject*) self;
2949 }
2950 else
2951 return PyString_FromStringAndSize(
2952 PyString_AS_STRING(self),
2953 PyString_GET_SIZE(self)
2954 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002955 }
2956
2957 fill = width - PyString_GET_SIZE(self);
2958
2959 s = pad(self, fill, 0, '0');
2960
2961 if (s == NULL)
2962 return NULL;
2963
2964 p = PyString_AS_STRING(s);
2965 if (p[fill] == '+' || p[fill] == '-') {
2966 /* move sign to beginning of string */
2967 p[0] = p[fill];
2968 p[fill] = '0';
2969 }
2970
2971 return (PyObject*) s;
2972}
2973
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002974PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002975"S.isspace() -> bool\n\
2976\n\
2977Return True if all characters in S are whitespace\n\
2978and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002979
2980static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002981string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002982{
Fred Drakeba096332000-07-09 07:04:36 +00002983 register const unsigned char *p
2984 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002985 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002986
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987 /* Shortcut for single character strings */
2988 if (PyString_GET_SIZE(self) == 1 &&
2989 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002990 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002991
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002992 /* Special case for empty strings */
2993 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002994 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002995
Guido van Rossum4c08d552000-03-10 22:55:18 +00002996 e = p + PyString_GET_SIZE(self);
2997 for (; p < e; p++) {
2998 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002999 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003000 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003001 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003002}
3003
3004
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003005PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003006"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003007\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003008Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003009and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003010
3011static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003012string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003013{
Fred Drakeba096332000-07-09 07:04:36 +00003014 register const unsigned char *p
3015 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003016 register const unsigned char *e;
3017
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003018 /* Shortcut for single character strings */
3019 if (PyString_GET_SIZE(self) == 1 &&
3020 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003021 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003022
3023 /* Special case for empty strings */
3024 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003025 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003026
3027 e = p + PyString_GET_SIZE(self);
3028 for (; p < e; p++) {
3029 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003030 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003031 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003032 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003033}
3034
3035
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003036PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003037"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003038\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003039Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003040and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003041
3042static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003043string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003044{
Fred Drakeba096332000-07-09 07:04:36 +00003045 register const unsigned char *p
3046 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003047 register const unsigned char *e;
3048
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003049 /* Shortcut for single character strings */
3050 if (PyString_GET_SIZE(self) == 1 &&
3051 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003052 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003053
3054 /* Special case for empty strings */
3055 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003056 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003057
3058 e = p + PyString_GET_SIZE(self);
3059 for (; p < e; p++) {
3060 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003061 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003062 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003063 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003064}
3065
3066
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003067PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003068"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003070Return True if all characters in S are digits\n\
3071and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003072
3073static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003074string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075{
Fred Drakeba096332000-07-09 07:04:36 +00003076 register const unsigned char *p
3077 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003078 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003079
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080 /* Shortcut for single character strings */
3081 if (PyString_GET_SIZE(self) == 1 &&
3082 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003083 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003084
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003085 /* Special case for empty strings */
3086 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003087 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003088
Guido van Rossum4c08d552000-03-10 22:55:18 +00003089 e = p + PyString_GET_SIZE(self);
3090 for (; p < e; p++) {
3091 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003092 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003093 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003094 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003095}
3096
3097
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003098PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003099"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003100\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003101Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003102at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003103
3104static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003105string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003106{
Fred Drakeba096332000-07-09 07:04:36 +00003107 register const unsigned char *p
3108 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003109 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003110 int cased;
3111
Guido van Rossum4c08d552000-03-10 22:55:18 +00003112 /* Shortcut for single character strings */
3113 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003114 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003115
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003116 /* Special case for empty strings */
3117 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003118 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003119
Guido van Rossum4c08d552000-03-10 22:55:18 +00003120 e = p + PyString_GET_SIZE(self);
3121 cased = 0;
3122 for (; p < e; p++) {
3123 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003124 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003125 else if (!cased && islower(*p))
3126 cased = 1;
3127 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003128 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003129}
3130
3131
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003132PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003133"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003134\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003135Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003136at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003137
3138static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003139string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003140{
Fred Drakeba096332000-07-09 07:04:36 +00003141 register const unsigned char *p
3142 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003143 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003144 int cased;
3145
Guido van Rossum4c08d552000-03-10 22:55:18 +00003146 /* Shortcut for single character strings */
3147 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003148 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003149
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003150 /* Special case for empty strings */
3151 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003152 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003153
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 e = p + PyString_GET_SIZE(self);
3155 cased = 0;
3156 for (; p < e; p++) {
3157 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003158 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003159 else if (!cased && isupper(*p))
3160 cased = 1;
3161 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003162 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003163}
3164
3165
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003166PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003167"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003168\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003169Return True if S is a titlecased string and there is at least one\n\
3170character in S, i.e. uppercase characters may only follow uncased\n\
3171characters and lowercase characters only cased ones. Return False\n\
3172otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003173
3174static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003175string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176{
Fred Drakeba096332000-07-09 07:04:36 +00003177 register const unsigned char *p
3178 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003179 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003180 int cased, previous_is_cased;
3181
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182 /* Shortcut for single character strings */
3183 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003184 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003185
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003186 /* Special case for empty strings */
3187 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003188 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003189
Guido van Rossum4c08d552000-03-10 22:55:18 +00003190 e = p + PyString_GET_SIZE(self);
3191 cased = 0;
3192 previous_is_cased = 0;
3193 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003194 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003195
3196 if (isupper(ch)) {
3197 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003198 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003199 previous_is_cased = 1;
3200 cased = 1;
3201 }
3202 else if (islower(ch)) {
3203 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003204 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003205 previous_is_cased = 1;
3206 cased = 1;
3207 }
3208 else
3209 previous_is_cased = 0;
3210 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003211 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003212}
3213
3214
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003215PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003216"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003217\n\
3218Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003219Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003220is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003221
Guido van Rossum4c08d552000-03-10 22:55:18 +00003222static PyObject*
3223string_splitlines(PyStringObject *self, PyObject *args)
3224{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003225 register Py_ssize_t i;
3226 register Py_ssize_t j;
3227 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003228 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003229 PyObject *list;
3230 PyObject *str;
3231 char *data;
3232
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003233 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003234 return NULL;
3235
3236 data = PyString_AS_STRING(self);
3237 len = PyString_GET_SIZE(self);
3238
Guido van Rossum4c08d552000-03-10 22:55:18 +00003239 list = PyList_New(0);
3240 if (!list)
3241 goto onError;
3242
3243 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003244 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003245
Guido van Rossum4c08d552000-03-10 22:55:18 +00003246 /* Find a line and append it */
3247 while (i < len && data[i] != '\n' && data[i] != '\r')
3248 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003249
3250 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003251 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003252 if (i < len) {
3253 if (data[i] == '\r' && i + 1 < len &&
3254 data[i+1] == '\n')
3255 i += 2;
3256 else
3257 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003258 if (keepends)
3259 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003260 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003261 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003262 j = i;
3263 }
3264 if (j < len) {
3265 SPLIT_APPEND(data, j, len);
3266 }
3267
3268 return list;
3269
3270 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003271 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003272 return NULL;
3273}
3274
3275#undef SPLIT_APPEND
3276
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003277static PyObject *
3278string_getnewargs(PyStringObject *v)
3279{
3280 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3281}
3282
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003283
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003284static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003285string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003286 /* Counterparts of the obsolete stropmodule functions; except
3287 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003288 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3289 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003290 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003291 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3292 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003293 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3294 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3295 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3296 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3297 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3298 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3299 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003300 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3301 capitalize__doc__},
3302 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3303 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3304 endswith__doc__},
3305 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3306 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3307 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3308 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3309 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3310 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3311 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3312 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3313 startswith__doc__},
3314 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3315 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3316 swapcase__doc__},
3317 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3318 translate__doc__},
3319 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3320 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3321 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3322 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3323 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3324 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3325 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3326 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3327 expandtabs__doc__},
3328 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3329 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003330 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003331 {NULL, NULL} /* sentinel */
3332};
3333
Jeremy Hylton938ace62002-07-17 16:30:39 +00003334static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003335str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3336
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003337static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003338string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003339{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003340 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003341 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003342
Guido van Rossumae960af2001-08-30 03:11:59 +00003343 if (type != &PyString_Type)
3344 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003345 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3346 return NULL;
3347 if (x == NULL)
3348 return PyString_FromString("");
3349 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003350}
3351
Guido van Rossumae960af2001-08-30 03:11:59 +00003352static PyObject *
3353str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3354{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003355 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003356 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003357
3358 assert(PyType_IsSubtype(type, &PyString_Type));
3359 tmp = string_new(&PyString_Type, args, kwds);
3360 if (tmp == NULL)
3361 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003362 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003363 n = PyString_GET_SIZE(tmp);
3364 pnew = type->tp_alloc(type, n);
3365 if (pnew != NULL) {
3366 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003367 ((PyStringObject *)pnew)->ob_shash =
3368 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003369 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003370 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003371 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003372 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003373}
3374
Guido van Rossumcacfc072002-05-24 19:01:59 +00003375static PyObject *
3376basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3377{
3378 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003379 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003380 return NULL;
3381}
3382
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003383static PyObject *
3384string_mod(PyObject *v, PyObject *w)
3385{
3386 if (!PyString_Check(v)) {
3387 Py_INCREF(Py_NotImplemented);
3388 return Py_NotImplemented;
3389 }
3390 return PyString_Format(v, w);
3391}
3392
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003393PyDoc_STRVAR(basestring_doc,
3394"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003395
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003396static PyNumberMethods string_as_number = {
3397 0, /*nb_add*/
3398 0, /*nb_subtract*/
3399 0, /*nb_multiply*/
3400 0, /*nb_divide*/
3401 string_mod, /*nb_remainder*/
3402};
3403
3404
Guido van Rossumcacfc072002-05-24 19:01:59 +00003405PyTypeObject PyBaseString_Type = {
3406 PyObject_HEAD_INIT(&PyType_Type)
3407 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003408 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003409 0,
3410 0,
3411 0, /* tp_dealloc */
3412 0, /* tp_print */
3413 0, /* tp_getattr */
3414 0, /* tp_setattr */
3415 0, /* tp_compare */
3416 0, /* tp_repr */
3417 0, /* tp_as_number */
3418 0, /* tp_as_sequence */
3419 0, /* tp_as_mapping */
3420 0, /* tp_hash */
3421 0, /* tp_call */
3422 0, /* tp_str */
3423 0, /* tp_getattro */
3424 0, /* tp_setattro */
3425 0, /* tp_as_buffer */
3426 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3427 basestring_doc, /* tp_doc */
3428 0, /* tp_traverse */
3429 0, /* tp_clear */
3430 0, /* tp_richcompare */
3431 0, /* tp_weaklistoffset */
3432 0, /* tp_iter */
3433 0, /* tp_iternext */
3434 0, /* tp_methods */
3435 0, /* tp_members */
3436 0, /* tp_getset */
3437 &PyBaseObject_Type, /* tp_base */
3438 0, /* tp_dict */
3439 0, /* tp_descr_get */
3440 0, /* tp_descr_set */
3441 0, /* tp_dictoffset */
3442 0, /* tp_init */
3443 0, /* tp_alloc */
3444 basestring_new, /* tp_new */
3445 0, /* tp_free */
3446};
3447
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003448PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003449"str(object) -> string\n\
3450\n\
3451Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003452If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003453
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003454PyTypeObject PyString_Type = {
3455 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003456 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003457 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003458 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003459 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00003460 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003461 (printfunc)string_print, /* tp_print */
3462 0, /* tp_getattr */
3463 0, /* tp_setattr */
3464 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00003465 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003466 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003467 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003468 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003469 (hashfunc)string_hash, /* tp_hash */
3470 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00003471 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003472 PyObject_GenericGetAttr, /* tp_getattro */
3473 0, /* tp_setattro */
3474 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00003475 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003476 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003477 string_doc, /* tp_doc */
3478 0, /* tp_traverse */
3479 0, /* tp_clear */
3480 (richcmpfunc)string_richcompare, /* tp_richcompare */
3481 0, /* tp_weaklistoffset */
3482 0, /* tp_iter */
3483 0, /* tp_iternext */
3484 string_methods, /* tp_methods */
3485 0, /* tp_members */
3486 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003487 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003488 0, /* tp_dict */
3489 0, /* tp_descr_get */
3490 0, /* tp_descr_set */
3491 0, /* tp_dictoffset */
3492 0, /* tp_init */
3493 0, /* tp_alloc */
3494 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003495 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003496};
3497
3498void
Fred Drakeba096332000-07-09 07:04:36 +00003499PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003500{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003501 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003502 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003503 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003504 if (w == NULL || !PyString_Check(*pv)) {
3505 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003506 *pv = NULL;
3507 return;
3508 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003509 v = string_concat((PyStringObject *) *pv, w);
3510 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003511 *pv = v;
3512}
3513
Guido van Rossum013142a1994-08-30 08:19:36 +00003514void
Fred Drakeba096332000-07-09 07:04:36 +00003515PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003516{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003517 PyString_Concat(pv, w);
3518 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003519}
3520
3521
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003522/* The following function breaks the notion that strings are immutable:
3523 it changes the size of a string. We get away with this only if there
3524 is only one module referencing the object. You can also think of it
3525 as creating a new string object and destroying the old one, only
3526 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003527 already be known to some other part of the code...
3528 Note that if there's not enough memory to resize the string, the original
3529 string object at *pv is deallocated, *pv is set to NULL, an "out of
3530 memory" exception is set, and -1 is returned. Else (on success) 0 is
3531 returned, and the value in *pv may or may not be the same as on input.
3532 As always, an extra byte is allocated for a trailing \0 byte (newsize
3533 does *not* include that), and a trailing \0 byte is stored.
3534*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003535
3536int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003537_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003538{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003539 register PyObject *v;
3540 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003541 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003542 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3543 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003544 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003545 Py_DECREF(v);
3546 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003547 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003548 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003549 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003550 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003551 _Py_ForgetReference(v);
3552 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003553 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003554 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003555 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003556 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003557 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003558 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003559 _Py_NewReference(*pv);
3560 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003561 sv->ob_size = newsize;
3562 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003563 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003564 return 0;
3565}
Guido van Rossume5372401993-03-16 12:15:04 +00003566
3567/* Helpers for formatstring */
3568
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003569static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00003570getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003571{
Thomas Wouters977485d2006-02-16 15:59:12 +00003572 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003573 if (argidx < arglen) {
3574 (*p_argidx)++;
3575 if (arglen < 0)
3576 return args;
3577 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003578 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003579 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003580 PyErr_SetString(PyExc_TypeError,
3581 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003582 return NULL;
3583}
3584
Tim Peters38fd5b62000-09-21 05:43:11 +00003585/* Format codes
3586 * F_LJUST '-'
3587 * F_SIGN '+'
3588 * F_BLANK ' '
3589 * F_ALT '#'
3590 * F_ZERO '0'
3591 */
Guido van Rossume5372401993-03-16 12:15:04 +00003592#define F_LJUST (1<<0)
3593#define F_SIGN (1<<1)
3594#define F_BLANK (1<<2)
3595#define F_ALT (1<<3)
3596#define F_ZERO (1<<4)
3597
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003598static int
Fred Drakeba096332000-07-09 07:04:36 +00003599formatfloat(char *buf, size_t buflen, int flags,
3600 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003601{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003602 /* fmt = '%#.' + `prec` + `type`
3603 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003604 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003605 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003606 x = PyFloat_AsDouble(v);
3607 if (x == -1.0 && PyErr_Occurred()) {
3608 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003609 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003610 }
Guido van Rossume5372401993-03-16 12:15:04 +00003611 if (prec < 0)
3612 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003613 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3614 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003615 /* Worst case length calc to ensure no buffer overrun:
3616
3617 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003618 fmt = %#.<prec>g
3619 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003620 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003621 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003622
3623 'f' formats:
3624 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3625 len = 1 + 50 + 1 + prec = 52 + prec
3626
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003627 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00003628 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003629
3630 */
3631 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3632 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003633 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003634 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003635 return -1;
3636 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003637 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3638 (flags&F_ALT) ? "#" : "",
3639 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003640 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003641 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003642}
3643
Tim Peters38fd5b62000-09-21 05:43:11 +00003644/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3645 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3646 * Python's regular ints.
3647 * Return value: a new PyString*, or NULL if error.
3648 * . *pbuf is set to point into it,
3649 * *plen set to the # of chars following that.
3650 * Caller must decref it when done using pbuf.
3651 * The string starting at *pbuf is of the form
3652 * "-"? ("0x" | "0X")? digit+
3653 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003654 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003655 * There will be at least prec digits, zero-filled on the left if
3656 * necessary to get that many.
3657 * val object to be converted
3658 * flags bitmask of format flags; only F_ALT is looked at
3659 * prec minimum number of digits; 0-fill on left if needed
3660 * type a character in [duoxX]; u acts the same as d
3661 *
3662 * CAUTION: o, x and X conversions on regular ints can never
3663 * produce a '-' sign, but can for Python's unbounded ints.
3664 */
3665PyObject*
3666_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3667 char **pbuf, int *plen)
3668{
3669 PyObject *result = NULL;
3670 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003671 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003672 int sign; /* 1 if '-', else 0 */
3673 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003674 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003675 int numdigits; /* len == numnondigits + numdigits */
3676 int numnondigits = 0;
3677
3678 switch (type) {
3679 case 'd':
3680 case 'u':
3681 result = val->ob_type->tp_str(val);
3682 break;
3683 case 'o':
3684 result = val->ob_type->tp_as_number->nb_oct(val);
3685 break;
3686 case 'x':
3687 case 'X':
3688 numnondigits = 2;
3689 result = val->ob_type->tp_as_number->nb_hex(val);
3690 break;
3691 default:
3692 assert(!"'type' not in [duoxX]");
3693 }
3694 if (!result)
3695 return NULL;
3696
3697 /* To modify the string in-place, there can only be one reference. */
3698 if (result->ob_refcnt != 1) {
3699 PyErr_BadInternalCall();
3700 return NULL;
3701 }
3702 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00003703 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003704 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00003705 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
3706 return NULL;
3707 }
3708 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003709 if (buf[len-1] == 'L') {
3710 --len;
3711 buf[len] = '\0';
3712 }
3713 sign = buf[0] == '-';
3714 numnondigits += sign;
3715 numdigits = len - numnondigits;
3716 assert(numdigits > 0);
3717
Tim Petersfff53252001-04-12 18:38:48 +00003718 /* Get rid of base marker unless F_ALT */
3719 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003720 /* Need to skip 0x, 0X or 0. */
3721 int skipped = 0;
3722 switch (type) {
3723 case 'o':
3724 assert(buf[sign] == '0');
3725 /* If 0 is only digit, leave it alone. */
3726 if (numdigits > 1) {
3727 skipped = 1;
3728 --numdigits;
3729 }
3730 break;
3731 case 'x':
3732 case 'X':
3733 assert(buf[sign] == '0');
3734 assert(buf[sign + 1] == 'x');
3735 skipped = 2;
3736 numnondigits -= 2;
3737 break;
3738 }
3739 if (skipped) {
3740 buf += skipped;
3741 len -= skipped;
3742 if (sign)
3743 buf[0] = '-';
3744 }
3745 assert(len == numnondigits + numdigits);
3746 assert(numdigits > 0);
3747 }
3748
3749 /* Fill with leading zeroes to meet minimum width. */
3750 if (prec > numdigits) {
3751 PyObject *r1 = PyString_FromStringAndSize(NULL,
3752 numnondigits + prec);
3753 char *b1;
3754 if (!r1) {
3755 Py_DECREF(result);
3756 return NULL;
3757 }
3758 b1 = PyString_AS_STRING(r1);
3759 for (i = 0; i < numnondigits; ++i)
3760 *b1++ = *buf++;
3761 for (i = 0; i < prec - numdigits; i++)
3762 *b1++ = '0';
3763 for (i = 0; i < numdigits; i++)
3764 *b1++ = *buf++;
3765 *b1 = '\0';
3766 Py_DECREF(result);
3767 result = r1;
3768 buf = PyString_AS_STRING(result);
3769 len = numnondigits + prec;
3770 }
3771
3772 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003773 if (type == 'X') {
3774 /* Need to convert all lower case letters to upper case.
3775 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003776 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003777 if (buf[i] >= 'a' && buf[i] <= 'x')
3778 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003779 }
3780 *pbuf = buf;
3781 *plen = len;
3782 return result;
3783}
3784
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003785static int
Fred Drakeba096332000-07-09 07:04:36 +00003786formatint(char *buf, size_t buflen, int flags,
3787 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003788{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003789 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003790 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3791 + 1 + 1 = 24 */
3792 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003793 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003794 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003795
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003796 x = PyInt_AsLong(v);
3797 if (x == -1 && PyErr_Occurred()) {
3798 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003799 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003800 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003801 if (x < 0 && type == 'u') {
3802 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003803 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003804 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3805 sign = "-";
3806 else
3807 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003808 if (prec < 0)
3809 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003810
3811 if ((flags & F_ALT) &&
3812 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003813 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003814 * of issues that cause pain:
3815 * - when 0 is being converted, the C standard leaves off
3816 * the '0x' or '0X', which is inconsistent with other
3817 * %#x/%#X conversions and inconsistent with Python's
3818 * hex() function
3819 * - there are platforms that violate the standard and
3820 * convert 0 with the '0x' or '0X'
3821 * (Metrowerks, Compaq Tru64)
3822 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003823 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003824 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003825 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003826 * We can achieve the desired consistency by inserting our
3827 * own '0x' or '0X' prefix, and substituting %x/%X in place
3828 * of %#x/%#X.
3829 *
3830 * Note that this is the same approach as used in
3831 * formatint() in unicodeobject.c
3832 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003833 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3834 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003835 }
3836 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003837 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3838 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003839 prec, type);
3840 }
3841
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003842 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3843 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003844 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003845 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003846 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003847 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003848 return -1;
3849 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003850 if (sign[0])
3851 PyOS_snprintf(buf, buflen, fmt, -x);
3852 else
3853 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003854 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003855}
3856
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003857static int
Fred Drakeba096332000-07-09 07:04:36 +00003858formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003859{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003860 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003861 if (PyString_Check(v)) {
3862 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003863 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003864 }
3865 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003866 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003867 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003868 }
3869 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003870 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003871}
3872
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003873/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3874
3875 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3876 chars are formatted. XXX This is a magic number. Each formatting
3877 routine does bounds checking to ensure no overflow, but a better
3878 solution may be to malloc a buffer of appropriate size for each
3879 format. For now, the current solution is sufficient.
3880*/
3881#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003882
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003883PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003884PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003885{
3886 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003887 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003888 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00003889 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003890 PyObject *result, *orig_args;
3891#ifdef Py_USING_UNICODE
3892 PyObject *v, *w;
3893#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003894 PyObject *dict = NULL;
3895 if (format == NULL || !PyString_Check(format) || args == NULL) {
3896 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003897 return NULL;
3898 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003899 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003900 fmt = PyString_AS_STRING(format);
3901 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003902 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003903 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003904 if (result == NULL)
3905 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003906 res = PyString_AsString(result);
3907 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003908 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003909 argidx = 0;
3910 }
3911 else {
3912 arglen = -1;
3913 argidx = -2;
3914 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003915 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3916 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003917 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003918 while (--fmtcnt >= 0) {
3919 if (*fmt != '%') {
3920 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003921 rescnt = fmtcnt + 100;
3922 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003923 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003924 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003925 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003926 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003927 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003928 }
3929 *res++ = *fmt++;
3930 }
3931 else {
3932 /* Got a format specifier */
3933 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003934 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003935 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003936 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003937 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003938 PyObject *v = NULL;
3939 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003940 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003941 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003942 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003943 char formatbuf[FORMATBUFLEN];
3944 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003945#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003946 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003947 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003948#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003949
Guido van Rossumda9c2711996-12-05 21:58:58 +00003950 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003951 if (*fmt == '(') {
3952 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003953 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003954 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003955 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003956
3957 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003958 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003959 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003960 goto error;
3961 }
3962 ++fmt;
3963 --fmtcnt;
3964 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003965 /* Skip over balanced parentheses */
3966 while (pcount > 0 && --fmtcnt >= 0) {
3967 if (*fmt == ')')
3968 --pcount;
3969 else if (*fmt == '(')
3970 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003971 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003972 }
3973 keylen = fmt - keystart - 1;
3974 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003975 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003976 "incomplete format key");
3977 goto error;
3978 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003979 key = PyString_FromStringAndSize(keystart,
3980 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003981 if (key == NULL)
3982 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003983 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003984 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003985 args_owned = 0;
3986 }
3987 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003988 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003989 if (args == NULL) {
3990 goto error;
3991 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003992 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003993 arglen = -1;
3994 argidx = -2;
3995 }
Guido van Rossume5372401993-03-16 12:15:04 +00003996 while (--fmtcnt >= 0) {
3997 switch (c = *fmt++) {
3998 case '-': flags |= F_LJUST; continue;
3999 case '+': flags |= F_SIGN; continue;
4000 case ' ': flags |= F_BLANK; continue;
4001 case '#': flags |= F_ALT; continue;
4002 case '0': flags |= F_ZERO; continue;
4003 }
4004 break;
4005 }
4006 if (c == '*') {
4007 v = getnextarg(args, arglen, &argidx);
4008 if (v == NULL)
4009 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004010 if (!PyInt_Check(v)) {
4011 PyErr_SetString(PyExc_TypeError,
4012 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004013 goto error;
4014 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004015 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004016 if (width < 0) {
4017 flags |= F_LJUST;
4018 width = -width;
4019 }
Guido van Rossume5372401993-03-16 12:15:04 +00004020 if (--fmtcnt >= 0)
4021 c = *fmt++;
4022 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004023 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004024 width = c - '0';
4025 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004026 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004027 if (!isdigit(c))
4028 break;
4029 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004030 PyErr_SetString(
4031 PyExc_ValueError,
4032 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004033 goto error;
4034 }
4035 width = width*10 + (c - '0');
4036 }
4037 }
4038 if (c == '.') {
4039 prec = 0;
4040 if (--fmtcnt >= 0)
4041 c = *fmt++;
4042 if (c == '*') {
4043 v = getnextarg(args, arglen, &argidx);
4044 if (v == NULL)
4045 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004046 if (!PyInt_Check(v)) {
4047 PyErr_SetString(
4048 PyExc_TypeError,
4049 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004050 goto error;
4051 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004052 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004053 if (prec < 0)
4054 prec = 0;
4055 if (--fmtcnt >= 0)
4056 c = *fmt++;
4057 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004058 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004059 prec = c - '0';
4060 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004061 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004062 if (!isdigit(c))
4063 break;
4064 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004065 PyErr_SetString(
4066 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004067 "prec too big");
4068 goto error;
4069 }
4070 prec = prec*10 + (c - '0');
4071 }
4072 }
4073 } /* prec */
4074 if (fmtcnt >= 0) {
4075 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004076 if (--fmtcnt >= 0)
4077 c = *fmt++;
4078 }
4079 }
4080 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004081 PyErr_SetString(PyExc_ValueError,
4082 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004083 goto error;
4084 }
4085 if (c != '%') {
4086 v = getnextarg(args, arglen, &argidx);
4087 if (v == NULL)
4088 goto error;
4089 }
4090 sign = 0;
4091 fill = ' ';
4092 switch (c) {
4093 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004094 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004095 len = 1;
4096 break;
4097 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004098#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004099 if (PyUnicode_Check(v)) {
4100 fmt = fmt_start;
4101 argidx = argidx_start;
4102 goto unicode;
4103 }
Georg Brandld45014b2005-10-01 17:06:00 +00004104#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004105 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004106#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004107 if (temp != NULL && PyUnicode_Check(temp)) {
4108 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004109 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004110 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004111 goto unicode;
4112 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004113#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004114 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004115 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004116 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004117 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004118 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004119 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004120 if (!PyString_Check(temp)) {
4121 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004122 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004123 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004124 goto error;
4125 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004126 pbuf = PyString_AS_STRING(temp);
4127 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004128 if (prec >= 0 && len > prec)
4129 len = prec;
4130 break;
4131 case 'i':
4132 case 'd':
4133 case 'u':
4134 case 'o':
4135 case 'x':
4136 case 'X':
4137 if (c == 'i')
4138 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004139 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004140 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004141 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004142 prec, c, &pbuf, &ilen);
4143 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004144 if (!temp)
4145 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004146 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004147 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004148 else {
4149 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004150 len = formatint(pbuf,
4151 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004152 flags, prec, c, v);
4153 if (len < 0)
4154 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004155 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004156 }
4157 if (flags & F_ZERO)
4158 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004159 break;
4160 case 'e':
4161 case 'E':
4162 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004163 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004164 case 'g':
4165 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004166 if (c == 'F')
4167 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004168 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004169 len = formatfloat(pbuf, sizeof(formatbuf),
4170 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004171 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004172 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004173 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004174 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004175 fill = '0';
4176 break;
4177 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004178#ifdef Py_USING_UNICODE
4179 if (PyUnicode_Check(v)) {
4180 fmt = fmt_start;
4181 argidx = argidx_start;
4182 goto unicode;
4183 }
4184#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004185 pbuf = formatbuf;
4186 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004187 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004188 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004189 break;
4190 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004191 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004192 "unsupported format character '%c' (0x%x) "
4193 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004194 c, c,
4195 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004196 goto error;
4197 }
4198 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004199 if (*pbuf == '-' || *pbuf == '+') {
4200 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004201 len--;
4202 }
4203 else if (flags & F_SIGN)
4204 sign = '+';
4205 else if (flags & F_BLANK)
4206 sign = ' ';
4207 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004208 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004209 }
4210 if (width < len)
4211 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004212 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004213 reslen -= rescnt;
4214 rescnt = width + fmtcnt + 100;
4215 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004216 if (reslen < 0) {
4217 Py_DECREF(result);
4218 return PyErr_NoMemory();
4219 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004220 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004221 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004222 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004223 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004224 }
4225 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004226 if (fill != ' ')
4227 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004228 rescnt--;
4229 if (width > len)
4230 width--;
4231 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004232 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4233 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004234 assert(pbuf[1] == c);
4235 if (fill != ' ') {
4236 *res++ = *pbuf++;
4237 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004238 }
Tim Petersfff53252001-04-12 18:38:48 +00004239 rescnt -= 2;
4240 width -= 2;
4241 if (width < 0)
4242 width = 0;
4243 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004244 }
4245 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004246 do {
4247 --rescnt;
4248 *res++ = fill;
4249 } while (--width > len);
4250 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004251 if (fill == ' ') {
4252 if (sign)
4253 *res++ = sign;
4254 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004255 (c == 'x' || c == 'X')) {
4256 assert(pbuf[0] == '0');
4257 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004258 *res++ = *pbuf++;
4259 *res++ = *pbuf++;
4260 }
4261 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004262 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004263 res += len;
4264 rescnt -= len;
4265 while (--width >= len) {
4266 --rescnt;
4267 *res++ = ' ';
4268 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004269 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004270 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004271 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004272 goto error;
4273 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004274 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004275 } /* '%' */
4276 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004277 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004278 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004279 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004280 goto error;
4281 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004282 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004283 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004284 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004285 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004286 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004287
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004288#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004289 unicode:
4290 if (args_owned) {
4291 Py_DECREF(args);
4292 args_owned = 0;
4293 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004294 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004295 if (PyTuple_Check(orig_args) && argidx > 0) {
4296 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004297 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004298 v = PyTuple_New(n);
4299 if (v == NULL)
4300 goto error;
4301 while (--n >= 0) {
4302 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4303 Py_INCREF(w);
4304 PyTuple_SET_ITEM(v, n, w);
4305 }
4306 args = v;
4307 } else {
4308 Py_INCREF(orig_args);
4309 args = orig_args;
4310 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004311 args_owned = 1;
4312 /* Take what we have of the result and let the Unicode formatting
4313 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004314 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004315 if (_PyString_Resize(&result, rescnt))
4316 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004317 fmtcnt = PyString_GET_SIZE(format) - \
4318 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004319 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4320 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004321 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004322 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004323 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004324 if (v == NULL)
4325 goto error;
4326 /* Paste what we have (result) to what the Unicode formatting
4327 function returned (v) and return the result (or error) */
4328 w = PyUnicode_Concat(result, v);
4329 Py_DECREF(result);
4330 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004331 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004332 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004333#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004334
Guido van Rossume5372401993-03-16 12:15:04 +00004335 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004336 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004337 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004338 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004339 }
Guido van Rossume5372401993-03-16 12:15:04 +00004340 return NULL;
4341}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004342
Guido van Rossum2a61e741997-01-18 07:55:05 +00004343void
Fred Drakeba096332000-07-09 07:04:36 +00004344PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004345{
4346 register PyStringObject *s = (PyStringObject *)(*p);
4347 PyObject *t;
4348 if (s == NULL || !PyString_Check(s))
4349 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004350 /* If it's a string subclass, we don't really know what putting
4351 it in the interned dict might do. */
4352 if (!PyString_CheckExact(s))
4353 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004354 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004355 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004356 if (interned == NULL) {
4357 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004358 if (interned == NULL) {
4359 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004360 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004361 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004362 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004363 t = PyDict_GetItem(interned, (PyObject *)s);
4364 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004365 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004366 Py_DECREF(*p);
4367 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004368 return;
4369 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004370
Armin Rigo79f7ad22004-08-07 19:27:39 +00004371 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004372 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004373 return;
4374 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004375 /* The two references in interned are not counted by refcnt.
4376 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004377 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004378 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004379}
4380
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004381void
4382PyString_InternImmortal(PyObject **p)
4383{
4384 PyString_InternInPlace(p);
4385 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4386 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4387 Py_INCREF(*p);
4388 }
4389}
4390
Guido van Rossum2a61e741997-01-18 07:55:05 +00004391
4392PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004393PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004394{
4395 PyObject *s = PyString_FromString(cp);
4396 if (s == NULL)
4397 return NULL;
4398 PyString_InternInPlace(&s);
4399 return s;
4400}
4401
Guido van Rossum8cf04761997-08-02 02:57:45 +00004402void
Fred Drakeba096332000-07-09 07:04:36 +00004403PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004404{
4405 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004406 for (i = 0; i < UCHAR_MAX + 1; i++) {
4407 Py_XDECREF(characters[i]);
4408 characters[i] = NULL;
4409 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004410 Py_XDECREF(nullstring);
4411 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004412}
Barry Warsawa903ad982001-02-23 16:40:48 +00004413
Barry Warsawa903ad982001-02-23 16:40:48 +00004414void _Py_ReleaseInternedStrings(void)
4415{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004416 PyObject *keys;
4417 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004418 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004419
4420 if (interned == NULL || !PyDict_Check(interned))
4421 return;
4422 keys = PyDict_Keys(interned);
4423 if (keys == NULL || !PyList_Check(keys)) {
4424 PyErr_Clear();
4425 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004426 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004427
4428 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4429 detector, interned strings are not forcibly deallocated; rather, we
4430 give them their stolen references back, and then clear and DECREF
4431 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004432
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004433 fprintf(stderr, "releasing interned strings\n");
4434 n = PyList_GET_SIZE(keys);
4435 for (i = 0; i < n; i++) {
4436 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4437 switch (s->ob_sstate) {
4438 case SSTATE_NOT_INTERNED:
4439 /* XXX Shouldn't happen */
4440 break;
4441 case SSTATE_INTERNED_IMMORTAL:
4442 s->ob_refcnt += 1;
4443 break;
4444 case SSTATE_INTERNED_MORTAL:
4445 s->ob_refcnt += 2;
4446 break;
4447 default:
4448 Py_FatalError("Inconsistent interned string state.");
4449 }
4450 s->ob_sstate = SSTATE_NOT_INTERNED;
4451 }
4452 Py_DECREF(keys);
4453 PyDict_Clear(interned);
4454 Py_DECREF(interned);
4455 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004456}