blob: 32e825ebe615144617aa4dedfee71d744073fc5d [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossum45ec02a2002-08-19 21:43:18 +000015/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000020 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000021 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
82 memcpy(op->ob_sval, str, size);
83 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000136 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
164 memcpy(count, vargs, sizeof(va_list));
165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
179 /* skip the 'l' in %ld, since it doesn't change the
180 width. although only %d is supported (see
181 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000182 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000183 if (*f == 'l' && *(f+1) == 'd')
184 ++f;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000185 /* likewise for %zd */
186 if (*f == 'z' && *(f+1) == 'd')
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000187 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000188
Barry Warsawdadace02001-08-24 18:32:06 +0000189 switch (*f) {
190 case 'c':
191 (void)va_arg(count, int);
192 /* fall through... */
193 case '%':
194 n++;
195 break;
196 case 'd': case 'i': case 'x':
197 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000198 /* 20 bytes is enough to hold a 64-bit
199 integer. Decimal takes the most space.
200 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000201 n += 20;
202 break;
203 case 's':
204 s = va_arg(count, char*);
205 n += strlen(s);
206 break;
207 case 'p':
208 (void) va_arg(count, int);
209 /* maximum 64-bit pointer representation:
210 * 0xffffffffffffffff
211 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000212 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000213 */
214 n += 19;
215 break;
216 default:
217 /* if we stumble upon an unknown
218 formatting code, copy the rest of
219 the format string to the output
220 string. (we cannot just skip the
221 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000222 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000223 n += strlen(p);
224 goto expand;
225 }
226 } else
227 n++;
228 }
229 expand:
230 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000231 /* Since we've analyzed how much space we need for the worst case,
232 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000233 string = PyString_FromStringAndSize(NULL, n);
234 if (!string)
235 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000236
Barry Warsawdadace02001-08-24 18:32:06 +0000237 s = PyString_AsString(string);
238
239 for (f = format; *f; f++) {
240 if (*f == '%') {
241 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000242 Py_ssize_t i;
243 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000244 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000245 /* parse the width.precision part (we're only
246 interested in the precision value, if any) */
247 n = 0;
248 while (isdigit(Py_CHARMASK(*f)))
249 n = (n*10) + *f++ - '0';
250 if (*f == '.') {
251 f++;
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 }
256 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
257 f++;
258 /* handle the long flag, but only for %ld. others
259 can be added when necessary. */
260 if (*f == 'l' && *(f+1) == 'd') {
261 longflag = 1;
262 ++f;
263 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000264 /* handle the size_t flag. */
265 if (*f == 'z' && *(f+1) == 'd') {
266 size_tflag = 1;
267 ++f;
268 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000269
Barry Warsawdadace02001-08-24 18:32:06 +0000270 switch (*f) {
271 case 'c':
272 *s++ = va_arg(vargs, int);
273 break;
274 case 'd':
275 if (longflag)
276 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000277 else if (size_tflag)
278 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
279 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000280 else
281 sprintf(s, "%d", va_arg(vargs, int));
282 s += strlen(s);
283 break;
284 case 'i':
285 sprintf(s, "%i", va_arg(vargs, int));
286 s += strlen(s);
287 break;
288 case 'x':
289 sprintf(s, "%x", va_arg(vargs, int));
290 s += strlen(s);
291 break;
292 case 's':
293 p = va_arg(vargs, char*);
294 i = strlen(p);
295 if (n > 0 && i > n)
296 i = n;
297 memcpy(s, p, i);
298 s += i;
299 break;
300 case 'p':
301 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000302 /* %p is ill-defined: ensure leading 0x. */
303 if (s[1] == 'X')
304 s[1] = 'x';
305 else if (s[1] != 'x') {
306 memmove(s+2, s, strlen(s)+1);
307 s[0] = '0';
308 s[1] = 'x';
309 }
Barry Warsawdadace02001-08-24 18:32:06 +0000310 s += strlen(s);
311 break;
312 case '%':
313 *s++ = '%';
314 break;
315 default:
316 strcpy(s, p);
317 s += strlen(s);
318 goto end;
319 }
320 } else
321 *s++ = *f;
322 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000323
Barry Warsawdadace02001-08-24 18:32:06 +0000324 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000325 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000326 return string;
327}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000328
Barry Warsawdadace02001-08-24 18:32:06 +0000329PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000330PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000331{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000332 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000333 va_list vargs;
334
335#ifdef HAVE_STDARG_PROTOTYPES
336 va_start(vargs, format);
337#else
338 va_start(vargs);
339#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000340 ret = PyString_FromFormatV(format, vargs);
341 va_end(vargs);
342 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000343}
344
345
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000346PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000347 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000348 const char *encoding,
349 const char *errors)
350{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000351 PyObject *v, *str;
352
353 str = PyString_FromStringAndSize(s, size);
354 if (str == NULL)
355 return NULL;
356 v = PyString_AsDecodedString(str, encoding, errors);
357 Py_DECREF(str);
358 return v;
359}
360
361PyObject *PyString_AsDecodedObject(PyObject *str,
362 const char *encoding,
363 const char *errors)
364{
365 PyObject *v;
366
367 if (!PyString_Check(str)) {
368 PyErr_BadArgument();
369 goto onError;
370 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000371
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000372 if (encoding == NULL) {
373#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000374 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000375#else
376 PyErr_SetString(PyExc_ValueError, "no encoding specified");
377 goto onError;
378#endif
379 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000380
381 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000382 v = PyCodec_Decode(str, encoding, errors);
383 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000384 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000385
386 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000387
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389 return NULL;
390}
391
392PyObject *PyString_AsDecodedString(PyObject *str,
393 const char *encoding,
394 const char *errors)
395{
396 PyObject *v;
397
398 v = PyString_AsDecodedObject(str, encoding, errors);
399 if (v == NULL)
400 goto onError;
401
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 /* Convert Unicode to a string using the default encoding */
404 if (PyUnicode_Check(v)) {
405 PyObject *temp = v;
406 v = PyUnicode_AsEncodedString(v, NULL, NULL);
407 Py_DECREF(temp);
408 if (v == NULL)
409 goto onError;
410 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 if (!PyString_Check(v)) {
413 PyErr_Format(PyExc_TypeError,
414 "decoder did not return a string object (type=%.400s)",
415 v->ob_type->tp_name);
416 Py_DECREF(v);
417 goto onError;
418 }
419
420 return v;
421
422 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000423 return NULL;
424}
425
426PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000427 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000428 const char *encoding,
429 const char *errors)
430{
431 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000432
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 str = PyString_FromStringAndSize(s, size);
434 if (str == NULL)
435 return NULL;
436 v = PyString_AsEncodedString(str, encoding, errors);
437 Py_DECREF(str);
438 return v;
439}
440
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000441PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 const char *encoding,
443 const char *errors)
444{
445 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000446
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000447 if (!PyString_Check(str)) {
448 PyErr_BadArgument();
449 goto onError;
450 }
451
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000452 if (encoding == NULL) {
453#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000454 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000455#else
456 PyErr_SetString(PyExc_ValueError, "no encoding specified");
457 goto onError;
458#endif
459 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000460
461 /* Encode via the codec registry */
462 v = PyCodec_Encode(str, encoding, errors);
463 if (v == NULL)
464 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000465
466 return v;
467
468 onError:
469 return NULL;
470}
471
472PyObject *PyString_AsEncodedString(PyObject *str,
473 const char *encoding,
474 const char *errors)
475{
476 PyObject *v;
477
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000478 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000479 if (v == NULL)
480 goto onError;
481
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 /* Convert Unicode to a string using the default encoding */
484 if (PyUnicode_Check(v)) {
485 PyObject *temp = v;
486 v = PyUnicode_AsEncodedString(v, NULL, NULL);
487 Py_DECREF(temp);
488 if (v == NULL)
489 goto onError;
490 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 if (!PyString_Check(v)) {
493 PyErr_Format(PyExc_TypeError,
494 "encoder did not return a string object (type=%.400s)",
495 v->ob_type->tp_name);
496 Py_DECREF(v);
497 goto onError;
498 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000499
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000500 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000501
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000502 onError:
503 return NULL;
504}
505
Guido van Rossum234f9421993-06-17 12:35:49 +0000506static void
Fred Drakeba096332000-07-09 07:04:36 +0000507string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000508{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000509 switch (PyString_CHECK_INTERNED(op)) {
510 case SSTATE_NOT_INTERNED:
511 break;
512
513 case SSTATE_INTERNED_MORTAL:
514 /* revive dead object temporarily for DelItem */
515 op->ob_refcnt = 3;
516 if (PyDict_DelItem(interned, op) != 0)
517 Py_FatalError(
518 "deletion of interned string failed");
519 break;
520
521 case SSTATE_INTERNED_IMMORTAL:
522 Py_FatalError("Immortal interned string died.");
523
524 default:
525 Py_FatalError("Inconsistent interned string state.");
526 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000527 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000528}
529
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000530/* Unescape a backslash-escaped string. If unicode is non-zero,
531 the string is a u-literal. If recode_encoding is non-zero,
532 the string is UTF-8 encoded and should be re-encoded in the
533 specified encoding. */
534
535PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000536 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000537 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000538 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539 const char *recode_encoding)
540{
541 int c;
542 char *p, *buf;
543 const char *end;
544 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000546 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 if (v == NULL)
548 return NULL;
549 p = buf = PyString_AsString(v);
550 end = s + len;
551 while (s < end) {
552 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000553 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554#ifdef Py_USING_UNICODE
555 if (recode_encoding && (*s & 0x80)) {
556 PyObject *u, *w;
557 char *r;
558 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000559 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000560 t = s;
561 /* Decode non-ASCII bytes as UTF-8. */
562 while (t < end && (*t & 0x80)) t++;
563 u = PyUnicode_DecodeUTF8(s, t - s, errors);
564 if(!u) goto failed;
565
566 /* Recode them in target encoding. */
567 w = PyUnicode_AsEncodedString(
568 u, recode_encoding, errors);
569 Py_DECREF(u);
570 if (!w) goto failed;
571
572 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000573 assert(PyString_Check(w));
574 r = PyString_AS_STRING(w);
575 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000576 memcpy(p, r, rn);
577 p += rn;
578 Py_DECREF(w);
579 s = t;
580 } else {
581 *p++ = *s++;
582 }
583#else
584 *p++ = *s++;
585#endif
586 continue;
587 }
588 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000589 if (s==end) {
590 PyErr_SetString(PyExc_ValueError,
591 "Trailing \\ in string");
592 goto failed;
593 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000594 switch (*s++) {
595 /* XXX This assumes ASCII! */
596 case '\n': break;
597 case '\\': *p++ = '\\'; break;
598 case '\'': *p++ = '\''; break;
599 case '\"': *p++ = '\"'; break;
600 case 'b': *p++ = '\b'; break;
601 case 'f': *p++ = '\014'; break; /* FF */
602 case 't': *p++ = '\t'; break;
603 case 'n': *p++ = '\n'; break;
604 case 'r': *p++ = '\r'; break;
605 case 'v': *p++ = '\013'; break; /* VT */
606 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
607 case '0': case '1': case '2': case '3':
608 case '4': case '5': case '6': case '7':
609 c = s[-1] - '0';
610 if ('0' <= *s && *s <= '7') {
611 c = (c<<3) + *s++ - '0';
612 if ('0' <= *s && *s <= '7')
613 c = (c<<3) + *s++ - '0';
614 }
615 *p++ = c;
616 break;
617 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000618 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000619 && isxdigit(Py_CHARMASK(s[1]))) {
620 unsigned int x = 0;
621 c = Py_CHARMASK(*s);
622 s++;
623 if (isdigit(c))
624 x = c - '0';
625 else if (islower(c))
626 x = 10 + c - 'a';
627 else
628 x = 10 + c - 'A';
629 x = x << 4;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x += c - '0';
634 else if (islower(c))
635 x += 10 + c - 'a';
636 else
637 x += 10 + c - 'A';
638 *p++ = x;
639 break;
640 }
641 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000642 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000643 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000644 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000645 }
646 if (strcmp(errors, "replace") == 0) {
647 *p++ = '?';
648 } else if (strcmp(errors, "ignore") == 0)
649 /* do nothing */;
650 else {
651 PyErr_Format(PyExc_ValueError,
652 "decoding error; "
653 "unknown error handling code: %.400s",
654 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657#ifndef Py_USING_UNICODE
658 case 'u':
659 case 'U':
660 case 'N':
661 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000662 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 "Unicode escapes not legal "
664 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000665 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000666 }
667#endif
668 default:
669 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000670 s--;
671 goto non_esc; /* an arbitry number of unescaped
672 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 }
674 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000675 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000676 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 return v;
678 failed:
679 Py_DECREF(v);
680 return NULL;
681}
682
Martin v. Löwis18e16552006-02-15 17:27:45 +0000683static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000684string_getsize(register PyObject *op)
685{
686 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000687 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000688 if (PyString_AsStringAndSize(op, &s, &len))
689 return -1;
690 return len;
691}
692
693static /*const*/ char *
694string_getbuffer(register PyObject *op)
695{
696 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698 if (PyString_AsStringAndSize(op, &s, &len))
699 return NULL;
700 return s;
701}
702
Martin v. Löwis18e16552006-02-15 17:27:45 +0000703Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000704PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000705{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000706 if (!PyString_Check(op))
707 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000708 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709}
710
711/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000712PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000713{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000714 if (!PyString_Check(op))
715 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000716 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717}
718
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000719int
720PyString_AsStringAndSize(register PyObject *obj,
721 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000722 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000723{
724 if (s == NULL) {
725 PyErr_BadInternalCall();
726 return -1;
727 }
728
729 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000730#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731 if (PyUnicode_Check(obj)) {
732 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
733 if (obj == NULL)
734 return -1;
735 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000736 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000737#endif
738 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000739 PyErr_Format(PyExc_TypeError,
740 "expected string or Unicode object, "
741 "%.200s found", obj->ob_type->tp_name);
742 return -1;
743 }
744 }
745
746 *s = PyString_AS_STRING(obj);
747 if (len != NULL)
748 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000749 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000750 PyErr_SetString(PyExc_TypeError,
751 "expected string without null bytes");
752 return -1;
753 }
754 return 0;
755}
756
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000757/* Methods */
758
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000759static int
Fred Drakeba096332000-07-09 07:04:36 +0000760string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000761{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000762 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000763 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000764 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000765
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000766 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000767 if (! PyString_CheckExact(op)) {
768 int ret;
769 /* A str subclass may have its own __str__ method. */
770 op = (PyStringObject *) PyObject_Str((PyObject *)op);
771 if (op == NULL)
772 return -1;
773 ret = string_print(op, fp, flags);
774 Py_DECREF(op);
775 return ret;
776 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000777 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000778#ifdef __VMS
779 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
780#else
781 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
782#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000783 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000784 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000785
Thomas Wouters7e474022000-07-16 12:04:32 +0000786 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000787 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000788 if (memchr(op->ob_sval, '\'', op->ob_size) &&
789 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000790 quote = '"';
791
792 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 for (i = 0; i < op->ob_size; i++) {
794 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000796 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000797 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000798 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000799 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000800 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000801 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000802 fprintf(fp, "\\r");
803 else if (c < ' ' || c >= 0x7f)
804 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000805 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000806 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000807 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000808 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000809 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000810}
811
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000812PyObject *
813PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000815 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000816 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000817 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000818 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000819 PyErr_SetString(PyExc_OverflowError,
820 "string is too large to make repr");
821 }
822 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000824 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 }
826 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000827 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 register char c;
829 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830 int quote;
831
Thomas Wouters7e474022000-07-16 12:04:32 +0000832 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833 quote = '\'';
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000834 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000835 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000836 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 quote = '"';
838
Tim Peters9161c8b2001-12-03 01:55:38 +0000839 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000840 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000842 /* There's at least enough room for a hex escape
843 and a closing quote. */
844 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000846 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000848 else if (c == '\t')
849 *p++ = '\\', *p++ = 't';
850 else if (c == '\n')
851 *p++ = '\\', *p++ = 'n';
852 else if (c == '\r')
853 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000854 else if (c < ' ' || c >= 0x7f) {
855 /* For performance, we don't want to call
856 PyOS_snprintf here (extra layers of
857 function call). */
858 sprintf(p, "\\x%02x", c & 0xff);
859 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000860 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000861 else
862 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000863 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000864 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000865 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000867 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000868 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000869 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000871}
872
Guido van Rossum189f1df2001-05-01 16:51:53 +0000873static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000874string_repr(PyObject *op)
875{
876 return PyString_Repr(op, 1);
877}
878
879static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000880string_str(PyObject *s)
881{
Tim Petersc9933152001-10-16 20:18:24 +0000882 assert(PyString_Check(s));
883 if (PyString_CheckExact(s)) {
884 Py_INCREF(s);
885 return s;
886 }
887 else {
888 /* Subtype -- return genuine string with the same value. */
889 PyStringObject *t = (PyStringObject *) s;
890 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
891 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000892}
893
Martin v. Löwis18e16552006-02-15 17:27:45 +0000894static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000895string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896{
897 return a->ob_size;
898}
899
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000900static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000901string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000902{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000903 register size_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000904 register PyStringObject *op;
905 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000906#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000907 if (PyUnicode_Check(bb))
908 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000909#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000910 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000911 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000912 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000913 return NULL;
914 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000915#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000916 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000917 if ((a->ob_size == 0 || b->ob_size == 0) &&
918 PyString_CheckExact(a) && PyString_CheckExact(b)) {
919 if (a->ob_size == 0) {
920 Py_INCREF(bb);
921 return bb;
922 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000923 Py_INCREF(a);
924 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925 }
926 size = a->ob_size + b->ob_size;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000927 /* XXX check overflow */
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000928 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000929 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000930 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000931 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000932 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000933 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000934 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
936 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000937 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939#undef b
940}
941
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000942static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000943string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000944{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000945 register Py_ssize_t i;
946 register Py_ssize_t j;
947 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000948 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000949 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000950 if (n < 0)
951 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000952 /* watch out for overflows: the size can overflow int,
953 * and the # of bytes needed can overflow size_t
954 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000955 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000956 if (n && size / n != a->ob_size) {
957 PyErr_SetString(PyExc_OverflowError,
958 "repeated string is too long");
959 return NULL;
960 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000961 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962 Py_INCREF(a);
963 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000964 }
Tim Peterse7c05322004-06-27 17:24:49 +0000965 nbytes = (size_t)size;
966 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000967 PyErr_SetString(PyExc_OverflowError,
968 "repeated string is too long");
969 return NULL;
970 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000971 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000972 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000973 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000974 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000975 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000976 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000977 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000978 op->ob_sval[size] = '\0';
979 if (a->ob_size == 1 && n > 0) {
980 memset(op->ob_sval, a->ob_sval[0] , n);
981 return (PyObject *) op;
982 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000983 i = 0;
984 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000985 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
986 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000987 }
988 while (i < size) {
989 j = (i <= size-i) ? i : size-i;
990 memcpy(op->ob_sval+i, op->ob_sval, j);
991 i += j;
992 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000993 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000994}
995
996/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
997
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000998static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000999string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001000 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001001 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001002{
1003 if (i < 0)
1004 i = 0;
1005 if (j < 0)
1006 j = 0; /* Avoid signed/unsigned bug in next line */
1007 if (j > a->ob_size)
1008 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001009 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1010 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001011 Py_INCREF(a);
1012 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001013 }
1014 if (j < i)
1015 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001016 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001017}
1018
Guido van Rossum9284a572000-03-07 15:53:43 +00001019static int
Fred Drakeba096332000-07-09 07:04:36 +00001020string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001021{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001022 char *s = PyString_AS_STRING(a);
1023 const char *sub = PyString_AS_STRING(el);
1024 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001025 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001026 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001027 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001028
1029 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001030#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001031 if (PyUnicode_Check(el))
1032 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001033#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001034 if (!PyString_Check(el)) {
1035 PyErr_SetString(PyExc_TypeError,
1036 "'in <string>' requires string as left operand");
1037 return -1;
1038 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001039 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001040
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001041 if (len_sub == 0)
1042 return 1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001043 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001044 substring. When s<last, there is still room for a possible match
1045 and s[0] through s[len_sub-1] will be in bounds.
1046 shortsub is len_sub minus the last character which is checked
1047 separately just before the memcmp(). That check helps prevent
1048 false starts and saves the setup time for memcmp().
1049 */
1050 firstchar = sub[0];
1051 shortsub = len_sub - 1;
1052 lastchar = sub[shortsub];
1053 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1054 while (s < last) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001055 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001056 if (s == NULL)
1057 return 0;
1058 assert(s < last);
1059 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001060 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001061 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001062 }
1063 return 0;
1064}
1065
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001066static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001067string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001068{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001069 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001070 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001071 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001072 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073 return NULL;
1074 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001075 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001076 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001077 if (v == NULL)
1078 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001079 else {
1080#ifdef COUNT_ALLOCS
1081 one_strings++;
1082#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001083 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001084 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001085 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001086}
1087
Martin v. Löwiscd353062001-05-24 16:56:35 +00001088static PyObject*
1089string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001090{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001091 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001092 Py_ssize_t len_a, len_b;
1093 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001094 PyObject *result;
1095
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001096 /* Make sure both arguments are strings. */
1097 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001098 result = Py_NotImplemented;
1099 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001100 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001101 if (a == b) {
1102 switch (op) {
1103 case Py_EQ:case Py_LE:case Py_GE:
1104 result = Py_True;
1105 goto out;
1106 case Py_NE:case Py_LT:case Py_GT:
1107 result = Py_False;
1108 goto out;
1109 }
1110 }
1111 if (op == Py_EQ) {
1112 /* Supporting Py_NE here as well does not save
1113 much time, since Py_NE is rarely used. */
1114 if (a->ob_size == b->ob_size
1115 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001116 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001117 a->ob_size) == 0)) {
1118 result = Py_True;
1119 } else {
1120 result = Py_False;
1121 }
1122 goto out;
1123 }
1124 len_a = a->ob_size; len_b = b->ob_size;
1125 min_len = (len_a < len_b) ? len_a : len_b;
1126 if (min_len > 0) {
1127 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1128 if (c==0)
1129 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1130 }else
1131 c = 0;
1132 if (c == 0)
1133 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1134 switch (op) {
1135 case Py_LT: c = c < 0; break;
1136 case Py_LE: c = c <= 0; break;
1137 case Py_EQ: assert(0); break; /* unreachable */
1138 case Py_NE: c = c != 0; break;
1139 case Py_GT: c = c > 0; break;
1140 case Py_GE: c = c >= 0; break;
1141 default:
1142 result = Py_NotImplemented;
1143 goto out;
1144 }
1145 result = c ? Py_True : Py_False;
1146 out:
1147 Py_INCREF(result);
1148 return result;
1149}
1150
1151int
1152_PyString_Eq(PyObject *o1, PyObject *o2)
1153{
1154 PyStringObject *a, *b;
1155 a = (PyStringObject*)o1;
1156 b = (PyStringObject*)o2;
1157 return a->ob_size == b->ob_size
1158 && *a->ob_sval == *b->ob_sval
1159 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001160}
1161
Guido van Rossum9bfef441993-03-29 10:43:31 +00001162static long
Fred Drakeba096332000-07-09 07:04:36 +00001163string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001164{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001165 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001166 register unsigned char *p;
1167 register long x;
1168
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001169 if (a->ob_shash != -1)
1170 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001171 len = a->ob_size;
1172 p = (unsigned char *) a->ob_sval;
1173 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001174 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001175 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001176 x ^= a->ob_size;
1177 if (x == -1)
1178 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001179 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180 return x;
1181}
1182
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001183#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1184
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001185static PyObject*
1186string_subscript(PyStringObject* self, PyObject* item)
1187{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001188 PyNumberMethods *nb = item->ob_type->tp_as_number;
1189 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1190 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001191 if (i == -1 && PyErr_Occurred())
1192 return NULL;
1193 if (i < 0)
1194 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001195 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001196 }
1197 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001198 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001199 char* source_buf;
1200 char* result_buf;
1201 PyObject* result;
1202
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001203 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001204 PyString_GET_SIZE(self),
1205 &start, &stop, &step, &slicelength) < 0) {
1206 return NULL;
1207 }
1208
1209 if (slicelength <= 0) {
1210 return PyString_FromStringAndSize("", 0);
1211 }
1212 else {
1213 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001214 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001215 if (result_buf == NULL)
1216 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001218 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219 cur += step, i++) {
1220 result_buf[i] = source_buf[cur];
1221 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001222
1223 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001224 slicelength);
1225 PyMem_Free(result_buf);
1226 return result;
1227 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001228 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001229 else {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001230 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001231 "string indices must be integers");
1232 return NULL;
1233 }
1234}
1235
Martin v. Löwis18e16552006-02-15 17:27:45 +00001236static Py_ssize_t
1237string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001238{
1239 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001240 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001241 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001242 return -1;
1243 }
1244 *ptr = (void *)self->ob_sval;
1245 return self->ob_size;
1246}
1247
Martin v. Löwis18e16552006-02-15 17:27:45 +00001248static Py_ssize_t
1249string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001250{
Guido van Rossum045e6881997-09-08 18:30:11 +00001251 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001252 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001253 return -1;
1254}
1255
Martin v. Löwis18e16552006-02-15 17:27:45 +00001256static Py_ssize_t
1257string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001258{
1259 if ( lenp )
1260 *lenp = self->ob_size;
1261 return 1;
1262}
1263
Martin v. Löwis18e16552006-02-15 17:27:45 +00001264static Py_ssize_t
1265string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001266{
1267 if ( index != 0 ) {
1268 PyErr_SetString(PyExc_SystemError,
1269 "accessing non-existent string segment");
1270 return -1;
1271 }
1272 *ptr = self->ob_sval;
1273 return self->ob_size;
1274}
1275
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001276static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001277 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001278 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001279 (ssizeargfunc)string_repeat, /*sq_repeat*/
1280 (ssizeargfunc)string_item, /*sq_item*/
1281 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001282 0, /*sq_ass_item*/
1283 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001284 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001285};
1286
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001287static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001288 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001289 (binaryfunc)string_subscript,
1290 0,
1291};
1292
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001293static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001294 (readbufferproc)string_buffer_getreadbuf,
1295 (writebufferproc)string_buffer_getwritebuf,
1296 (segcountproc)string_buffer_getsegcount,
1297 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001298};
1299
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001300
1301
1302#define LEFTSTRIP 0
1303#define RIGHTSTRIP 1
1304#define BOTHSTRIP 2
1305
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001306/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001307static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1308
1309#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001310
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001311#define SPLIT_APPEND(data, left, right) \
1312 str = PyString_FromStringAndSize((data) + (left), \
1313 (right) - (left)); \
1314 if (str == NULL) \
1315 goto onError; \
1316 if (PyList_Append(list, str)) { \
1317 Py_DECREF(str); \
1318 goto onError; \
1319 } \
1320 else \
1321 Py_DECREF(str);
1322
1323#define SPLIT_INSERT(data, left, right) \
1324 str = PyString_FromStringAndSize((data) + (left), \
1325 (right) - (left)); \
1326 if (str == NULL) \
1327 goto onError; \
1328 if (PyList_Insert(list, 0, str)) { \
1329 Py_DECREF(str); \
1330 goto onError; \
1331 } \
1332 else \
1333 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334
1335static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001336split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001338 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001339 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001340 PyObject *list = PyList_New(0);
1341
1342 if (list == NULL)
1343 return NULL;
1344
Guido van Rossum4c08d552000-03-10 22:55:18 +00001345 for (i = j = 0; i < len; ) {
1346 while (i < len && isspace(Py_CHARMASK(s[i])))
1347 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 while (i < len && !isspace(Py_CHARMASK(s[i])))
1350 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001351 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001352 if (maxsplit-- <= 0)
1353 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001354 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 while (i < len && isspace(Py_CHARMASK(s[i])))
1356 i++;
1357 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 }
1359 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001360 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001361 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001362 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001363 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001364 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365 Py_DECREF(list);
1366 return NULL;
1367}
1368
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001369static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001370split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001371{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001372 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001373 PyObject *str;
1374 PyObject *list = PyList_New(0);
1375
1376 if (list == NULL)
1377 return NULL;
1378
1379 for (i = j = 0; i < len; ) {
1380 if (s[i] == ch) {
1381 if (maxcount-- <= 0)
1382 break;
1383 SPLIT_APPEND(s, j, i);
1384 i = j = i + 1;
1385 } else
1386 i++;
1387 }
1388 if (j <= len) {
1389 SPLIT_APPEND(s, j, len);
1390 }
1391 return list;
1392
1393 onError:
1394 Py_DECREF(list);
1395 return NULL;
1396}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001397
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001398PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399"S.split([sep [,maxsplit]]) -> list of strings\n\
1400\n\
1401Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001402delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001403splits are done. If sep is not specified or is None, any\n\
1404whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405
1406static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001407string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001409 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1410 int err;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001411 Py_ssize_t maxsplit = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001412 const char *s = PyString_AS_STRING(self), *sub;
1413 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001415 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001417 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001418 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001419 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001421 if (PyString_Check(subobj)) {
1422 sub = PyString_AS_STRING(subobj);
1423 n = PyString_GET_SIZE(subobj);
1424 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001425#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001426 else if (PyUnicode_Check(subobj))
1427 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001428#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001429 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1430 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001431
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432 if (n == 0) {
1433 PyErr_SetString(PyExc_ValueError, "empty separator");
1434 return NULL;
1435 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 else if (n == 1)
1437 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438
1439 list = PyList_New(0);
1440 if (list == NULL)
1441 return NULL;
1442
1443 i = j = 0;
1444 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001445 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001446 if (maxsplit-- <= 0)
1447 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001448 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449 if (item == NULL)
1450 goto fail;
1451 err = PyList_Append(list, item);
1452 Py_DECREF(item);
1453 if (err < 0)
1454 goto fail;
1455 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456 }
1457 else
1458 i++;
1459 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001460 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461 if (item == NULL)
1462 goto fail;
1463 err = PyList_Append(list, item);
1464 Py_DECREF(item);
1465 if (err < 0)
1466 goto fail;
1467
1468 return list;
1469
1470 fail:
1471 Py_DECREF(list);
1472 return NULL;
1473}
1474
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001475static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001476rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001477{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001478 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001479 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001480 PyObject *list = PyList_New(0);
1481
1482 if (list == NULL)
1483 return NULL;
1484
1485 for (i = j = len - 1; i >= 0; ) {
1486 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1487 i--;
1488 j = i;
1489 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1490 i--;
1491 if (j > i) {
1492 if (maxsplit-- <= 0)
1493 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001494 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001495 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1496 i--;
1497 j = i;
1498 }
1499 }
1500 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001501 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001502 }
1503 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001504 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001505 Py_DECREF(list);
1506 return NULL;
1507}
1508
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001509static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001510rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001511{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001512 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001513 PyObject *str;
1514 PyObject *list = PyList_New(0);
1515
1516 if (list == NULL)
1517 return NULL;
1518
1519 for (i = j = len - 1; i >= 0; ) {
1520 if (s[i] == ch) {
1521 if (maxcount-- <= 0)
1522 break;
1523 SPLIT_INSERT(s, i + 1, j + 1);
1524 j = i = i - 1;
1525 } else
1526 i--;
1527 }
1528 if (j >= -1) {
1529 SPLIT_INSERT(s, 0, j + 1);
1530 }
1531 return list;
1532
1533 onError:
1534 Py_DECREF(list);
1535 return NULL;
1536}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001537
1538PyDoc_STRVAR(rsplit__doc__,
1539"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1540\n\
1541Return a list of the words in the string S, using sep as the\n\
1542delimiter string, starting at the end of the string and working\n\
1543to the front. If maxsplit is given, at most maxsplit splits are\n\
1544done. If sep is not specified or is None, any whitespace string\n\
1545is a separator.");
1546
1547static PyObject *
1548string_rsplit(PyStringObject *self, PyObject *args)
1549{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001550 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1551 int err;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001552 Py_ssize_t maxsplit = -1;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001553 const char *s = PyString_AS_STRING(self), *sub;
1554 PyObject *list, *item, *subobj = Py_None;
1555
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001556 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001557 return NULL;
1558 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001559 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001560 if (subobj == Py_None)
1561 return rsplit_whitespace(s, len, maxsplit);
1562 if (PyString_Check(subobj)) {
1563 sub = PyString_AS_STRING(subobj);
1564 n = PyString_GET_SIZE(subobj);
1565 }
1566#ifdef Py_USING_UNICODE
1567 else if (PyUnicode_Check(subobj))
1568 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1569#endif
1570 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1571 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001572
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001573 if (n == 0) {
1574 PyErr_SetString(PyExc_ValueError, "empty separator");
1575 return NULL;
1576 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001577 else if (n == 1)
1578 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001579
1580 list = PyList_New(0);
1581 if (list == NULL)
1582 return NULL;
1583
1584 j = len;
1585 i = j - n;
1586 while (i >= 0) {
1587 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1588 if (maxsplit-- <= 0)
1589 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001590 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001591 if (item == NULL)
1592 goto fail;
1593 err = PyList_Insert(list, 0, item);
1594 Py_DECREF(item);
1595 if (err < 0)
1596 goto fail;
1597 j = i;
1598 i -= n;
1599 }
1600 else
1601 i--;
1602 }
1603 item = PyString_FromStringAndSize(s, j);
1604 if (item == NULL)
1605 goto fail;
1606 err = PyList_Insert(list, 0, item);
1607 Py_DECREF(item);
1608 if (err < 0)
1609 goto fail;
1610
1611 return list;
1612
1613 fail:
1614 Py_DECREF(list);
1615 return NULL;
1616}
1617
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001619PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620"S.join(sequence) -> string\n\
1621\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001623sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624
1625static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001626string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627{
1628 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001629 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001632 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001633 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001634 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001635 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636
Tim Peters19fe14e2001-01-19 03:03:47 +00001637 seq = PySequence_Fast(orig, "");
1638 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001639 return NULL;
1640 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001641
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001642 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001643 if (seqlen == 0) {
1644 Py_DECREF(seq);
1645 return PyString_FromString("");
1646 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001648 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001649 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1650 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001651 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001652 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001653 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001655
Raymond Hettinger674f2412004-08-23 23:23:54 +00001656 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001657 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001658 * Do a pre-pass to figure out the total amount of space we'll
1659 * need (sz), see whether any argument is absurd, and defer to
1660 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001661 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001662 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001663 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001664 item = PySequence_Fast_GET_ITEM(seq, i);
1665 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001666#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001667 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001668 /* Defer to Unicode join.
1669 * CAUTION: There's no gurantee that the
1670 * original sequence can be iterated over
1671 * again, so we must pass seq here.
1672 */
1673 PyObject *result;
1674 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001675 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001676 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001677 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001678#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001679 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001680 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001681 " %.80s found",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001682 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001683 Py_DECREF(seq);
1684 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001685 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001686 sz += PyString_GET_SIZE(item);
1687 if (i != 0)
1688 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001689 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001690 PyErr_SetString(PyExc_OverflowError,
1691 "join() is too long for a Python string");
1692 Py_DECREF(seq);
1693 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001695 }
1696
1697 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001698 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001699 if (res == NULL) {
1700 Py_DECREF(seq);
1701 return NULL;
1702 }
1703
1704 /* Catenate everything. */
1705 p = PyString_AS_STRING(res);
1706 for (i = 0; i < seqlen; ++i) {
1707 size_t n;
1708 item = PySequence_Fast_GET_ITEM(seq, i);
1709 n = PyString_GET_SIZE(item);
1710 memcpy(p, PyString_AS_STRING(item), n);
1711 p += n;
1712 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001713 memcpy(p, sep, seplen);
1714 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001715 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001716 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001717
Jeremy Hylton49048292000-07-11 03:28:17 +00001718 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720}
1721
Tim Peters52e155e2001-06-16 05:42:57 +00001722PyObject *
1723_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001724{
Tim Petersa7259592001-06-16 05:11:17 +00001725 assert(sep != NULL && PyString_Check(sep));
1726 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001727 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001728}
1729
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001730static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001731string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001732{
1733 if (*end > len)
1734 *end = len;
1735 else if (*end < 0)
1736 *end += len;
1737 if (*end < 0)
1738 *end = 0;
1739 if (*start < 0)
1740 *start += len;
1741 if (*start < 0)
1742 *start = 0;
1743}
1744
Martin v. Löwis18e16552006-02-15 17:27:45 +00001745static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001746string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001749 Py_ssize_t len = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001750 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001751 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752
Martin v. Löwis18e16552006-02-15 17:27:45 +00001753 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001754 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001755 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001756 return -2;
1757 if (PyString_Check(subobj)) {
1758 sub = PyString_AS_STRING(subobj);
1759 n = PyString_GET_SIZE(subobj);
1760 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001761#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001762 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001763 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001764#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001765 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766 return -2;
1767
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001768 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001769
Guido van Rossum4c08d552000-03-10 22:55:18 +00001770 if (dir > 0) {
1771 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001773 last -= n;
1774 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001775 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001776 return (long)i;
1777 }
1778 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001779 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001780
Guido van Rossum4c08d552000-03-10 22:55:18 +00001781 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001782 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001783 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001784 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001785 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001786 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001787
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788 return -1;
1789}
1790
1791
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001792PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793"S.find(sub [,start [,end]]) -> int\n\
1794\n\
1795Return the lowest index in S where substring sub is found,\n\
1796such that sub is contained within s[start,end]. Optional\n\
1797arguments start and end are interpreted as in slice notation.\n\
1798\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001799Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800
1801static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001802string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001804 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805 if (result == -2)
1806 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001807 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808}
1809
1810
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001811PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812"S.index(sub [,start [,end]]) -> int\n\
1813\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001814Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815
1816static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001817string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001819 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820 if (result == -2)
1821 return NULL;
1822 if (result == -1) {
1823 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001824 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825 return NULL;
1826 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001827 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001828}
1829
1830
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001831PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832"S.rfind(sub [,start [,end]]) -> int\n\
1833\n\
1834Return the highest index in S where substring sub is found,\n\
1835such that sub is contained within s[start,end]. Optional\n\
1836arguments start and end are interpreted as in slice notation.\n\
1837\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001838Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839
1840static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001841string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001843 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844 if (result == -2)
1845 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001846 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847}
1848
1849
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001850PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851"S.rindex(sub [,start [,end]]) -> int\n\
1852\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001853Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001854
1855static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001856string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001858 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001859 if (result == -2)
1860 return NULL;
1861 if (result == -1) {
1862 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001863 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 return NULL;
1865 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001866 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867}
1868
1869
1870static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001871do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1872{
1873 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001874 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001875 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001876 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1877 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001878
1879 i = 0;
1880 if (striptype != RIGHTSTRIP) {
1881 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1882 i++;
1883 }
1884 }
1885
1886 j = len;
1887 if (striptype != LEFTSTRIP) {
1888 do {
1889 j--;
1890 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1891 j++;
1892 }
1893
1894 if (i == 0 && j == len && PyString_CheckExact(self)) {
1895 Py_INCREF(self);
1896 return (PyObject*)self;
1897 }
1898 else
1899 return PyString_FromStringAndSize(s+i, j-i);
1900}
1901
1902
1903static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001904do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905{
1906 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001907 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909 i = 0;
1910 if (striptype != RIGHTSTRIP) {
1911 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1912 i++;
1913 }
1914 }
1915
1916 j = len;
1917 if (striptype != LEFTSTRIP) {
1918 do {
1919 j--;
1920 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1921 j++;
1922 }
1923
Tim Peters8fa5dd02001-09-12 02:18:30 +00001924 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 Py_INCREF(self);
1926 return (PyObject*)self;
1927 }
1928 else
1929 return PyString_FromStringAndSize(s+i, j-i);
1930}
1931
1932
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001933static PyObject *
1934do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1935{
1936 PyObject *sep = NULL;
1937
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001938 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001939 return NULL;
1940
1941 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001942 if (PyString_Check(sep))
1943 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001944#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001945 else if (PyUnicode_Check(sep)) {
1946 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1947 PyObject *res;
1948 if (uniself==NULL)
1949 return NULL;
1950 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1951 striptype, sep);
1952 Py_DECREF(uniself);
1953 return res;
1954 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001955#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001956 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001957#ifdef Py_USING_UNICODE
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001958 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001959#else
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001960 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001961#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001962 STRIPNAME(striptype));
1963 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001964 }
1965
1966 return do_strip(self, striptype);
1967}
1968
1969
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001970PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001971"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972\n\
1973Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001974whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001975If chars is given and not None, remove characters in chars instead.\n\
1976If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977
1978static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001979string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001981 if (PyTuple_GET_SIZE(args) == 0)
1982 return do_strip(self, BOTHSTRIP); /* Common case */
1983 else
1984 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985}
1986
1987
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001988PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001989"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001991Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001992If chars is given and not None, remove characters in chars instead.\n\
1993If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994
1995static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001996string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001998 if (PyTuple_GET_SIZE(args) == 0)
1999 return do_strip(self, LEFTSTRIP); /* Common case */
2000 else
2001 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002}
2003
2004
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002005PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002006"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002008Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002009If chars is given and not None, remove characters in chars instead.\n\
2010If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011
2012static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002013string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002015 if (PyTuple_GET_SIZE(args) == 0)
2016 return do_strip(self, RIGHTSTRIP); /* Common case */
2017 else
2018 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019}
2020
2021
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002022PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023"S.lower() -> string\n\
2024\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002025Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026
2027static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002028string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029{
2030 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002031 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002032 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002034 newobj = PyString_FromStringAndSize(NULL, n);
2035 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002037 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038 for (i = 0; i < n; i++) {
2039 int c = Py_CHARMASK(*s++);
2040 if (isupper(c)) {
2041 *s_new = tolower(c);
2042 } else
2043 *s_new = c;
2044 s_new++;
2045 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002046 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047}
2048
2049
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002050PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051"S.upper() -> string\n\
2052\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002053Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054
2055static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002056string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002057{
2058 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002059 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002060 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002062 newobj = PyString_FromStringAndSize(NULL, n);
2063 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002065 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066 for (i = 0; i < n; i++) {
2067 int c = Py_CHARMASK(*s++);
2068 if (islower(c)) {
2069 *s_new = toupper(c);
2070 } else
2071 *s_new = c;
2072 s_new++;
2073 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002074 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075}
2076
2077
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002078PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002079"S.title() -> string\n\
2080\n\
2081Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002082characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002083
2084static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002085string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002086{
2087 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002088 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002089 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002090 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002091
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002092 newobj = PyString_FromStringAndSize(NULL, n);
2093 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002094 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002095 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096 for (i = 0; i < n; i++) {
2097 int c = Py_CHARMASK(*s++);
2098 if (islower(c)) {
2099 if (!previous_is_cased)
2100 c = toupper(c);
2101 previous_is_cased = 1;
2102 } else if (isupper(c)) {
2103 if (previous_is_cased)
2104 c = tolower(c);
2105 previous_is_cased = 1;
2106 } else
2107 previous_is_cased = 0;
2108 *s_new++ = c;
2109 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002110 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111}
2112
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002113PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114"S.capitalize() -> string\n\
2115\n\
2116Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002117capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118
2119static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002120string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121{
2122 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002123 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002124 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002126 newobj = PyString_FromStringAndSize(NULL, n);
2127 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002129 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130 if (0 < n) {
2131 int c = Py_CHARMASK(*s++);
2132 if (islower(c))
2133 *s_new = toupper(c);
2134 else
2135 *s_new = c;
2136 s_new++;
2137 }
2138 for (i = 1; i < n; i++) {
2139 int c = Py_CHARMASK(*s++);
2140 if (isupper(c))
2141 *s_new = tolower(c);
2142 else
2143 *s_new = c;
2144 s_new++;
2145 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002146 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147}
2148
2149
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002150PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151"S.count(sub[, start[, end]]) -> int\n\
2152\n\
2153Return the number of occurrences of substring sub in string\n\
2154S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002155interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156
2157static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002158string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002160 const char *s = PyString_AS_STRING(self), *sub, *t;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002161 Py_ssize_t len = PyString_GET_SIZE(self), n;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002162 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002163 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002164 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165
Guido van Rossumc6821402000-05-08 14:08:05 +00002166 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2167 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002169
Guido van Rossum4c08d552000-03-10 22:55:18 +00002170 if (PyString_Check(subobj)) {
2171 sub = PyString_AS_STRING(subobj);
2172 n = PyString_GET_SIZE(subobj);
2173 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002174#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002175 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002176 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002177 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2178 if (count == -1)
2179 return NULL;
2180 else
2181 return PyInt_FromLong((long) count);
2182 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002183#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002184 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2185 return NULL;
2186
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002187 string_adjust_indices(&i, &last, len);
2188
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189 m = last + 1 - n;
2190 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002191 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192
2193 r = 0;
2194 while (i < m) {
2195 if (!memcmp(s+i, sub, n)) {
2196 r++;
2197 i += n;
2198 } else {
2199 i++;
2200 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002201 if (i >= m)
2202 break;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002203 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002204 if (t == NULL)
2205 break;
2206 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002208 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209}
2210
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002211PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212"S.swapcase() -> string\n\
2213\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002214Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002215converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002216
2217static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002218string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219{
2220 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002221 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002222 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002224 newobj = PyString_FromStringAndSize(NULL, n);
2225 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002227 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228 for (i = 0; i < n; i++) {
2229 int c = Py_CHARMASK(*s++);
2230 if (islower(c)) {
2231 *s_new = toupper(c);
2232 }
2233 else if (isupper(c)) {
2234 *s_new = tolower(c);
2235 }
2236 else
2237 *s_new = c;
2238 s_new++;
2239 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002240 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241}
2242
2243
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002244PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245"S.translate(table [,deletechars]) -> string\n\
2246\n\
2247Return a copy of the string S, where all characters occurring\n\
2248in the optional argument deletechars are removed, and the\n\
2249remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002250translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251
2252static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002253string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002255 register char *input, *output;
2256 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002257 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002260 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261 PyObject *result;
2262 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002263 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002265 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002266 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268
2269 if (PyString_Check(tableobj)) {
2270 table1 = PyString_AS_STRING(tableobj);
2271 tablen = PyString_GET_SIZE(tableobj);
2272 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002273#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002275 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276 parameter; instead a mapping to None will cause characters
2277 to be deleted. */
2278 if (delobj != NULL) {
2279 PyErr_SetString(PyExc_TypeError,
2280 "deletions are implemented differently for unicode");
2281 return NULL;
2282 }
2283 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2284 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002285#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002286 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002288
Martin v. Löwis00b61272002-12-12 20:03:19 +00002289 if (tablen != 256) {
2290 PyErr_SetString(PyExc_ValueError,
2291 "translation table must be 256 characters long");
2292 return NULL;
2293 }
2294
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295 if (delobj != NULL) {
2296 if (PyString_Check(delobj)) {
2297 del_table = PyString_AS_STRING(delobj);
2298 dellen = PyString_GET_SIZE(delobj);
2299 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002300#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301 else if (PyUnicode_Check(delobj)) {
2302 PyErr_SetString(PyExc_TypeError,
2303 "deletions are implemented differently for unicode");
2304 return NULL;
2305 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002306#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2308 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 }
2310 else {
2311 del_table = NULL;
2312 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002313 }
2314
2315 table = table1;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002316 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317 result = PyString_FromStringAndSize((char *)NULL, inlen);
2318 if (result == NULL)
2319 return NULL;
2320 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002321 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002322
2323 if (dellen == 0) {
2324 /* If no deletions are required, use faster code */
2325 for (i = inlen; --i >= 0; ) {
2326 c = Py_CHARMASK(*input++);
2327 if (Py_CHARMASK((*output++ = table[c])) != c)
2328 changed = 1;
2329 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002330 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002331 return result;
2332 Py_DECREF(result);
2333 Py_INCREF(input_obj);
2334 return input_obj;
2335 }
2336
2337 for (i = 0; i < 256; i++)
2338 trans_table[i] = Py_CHARMASK(table[i]);
2339
2340 for (i = 0; i < dellen; i++)
2341 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2342
2343 for (i = inlen; --i >= 0; ) {
2344 c = Py_CHARMASK(*input++);
2345 if (trans_table[c] != -1)
2346 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2347 continue;
2348 changed = 1;
2349 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002350 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351 Py_DECREF(result);
2352 Py_INCREF(input_obj);
2353 return input_obj;
2354 }
2355 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002356 if (inlen > 0)
2357 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358 return result;
2359}
2360
2361
2362/* What follows is used for implementing replace(). Perry Stoll. */
2363
2364/*
2365 mymemfind
2366
2367 strstr replacement for arbitrary blocks of memory.
2368
Barry Warsaw51ac5802000-03-20 16:36:48 +00002369 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370 contents of memory pointed to by PAT. Returns the index into MEM if
2371 found, or -1 if not found. If len of PAT is greater than length of
2372 MEM, the function returns -1.
2373*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00002374static Py_ssize_t
2375mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002377 register Py_ssize_t ii;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378
2379 /* pattern can not occur in the last pat_len-1 chars */
2380 len -= pat_len;
2381
2382 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002383 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002384 return ii;
2385 }
2386 }
2387 return -1;
2388}
2389
2390/*
2391 mymemcnt
2392
2393 Return the number of distinct times PAT is found in MEM.
2394 meaning mem=1111 and pat==11 returns 2.
2395 mem=11111 and pat==11 also return 2.
2396 */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002397static Py_ssize_t
2398mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002399{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002400 register Py_ssize_t offset = 0;
2401 Py_ssize_t nfound = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002402
2403 while (len >= 0) {
2404 offset = mymemfind(mem, len, pat, pat_len);
2405 if (offset == -1)
2406 break;
2407 mem += offset + pat_len;
2408 len -= offset + pat_len;
2409 nfound++;
2410 }
2411 return nfound;
2412}
2413
2414/*
2415 mymemreplace
2416
Thomas Wouters7e474022000-07-16 12:04:32 +00002417 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418 replaced with SUB.
2419
Thomas Wouters7e474022000-07-16 12:04:32 +00002420 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421 of PAT in STR, then the original string is returned. Otherwise, a new
2422 string is allocated here and returned.
2423
2424 on return, out_len is:
2425 the length of output string, or
2426 -1 if the input string is returned, or
2427 unchanged if an error occurs (no memory).
2428
2429 return value is:
2430 the new string allocated locally, or
2431 NULL if an error occurred.
2432*/
2433static char *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002434mymemreplace(const char *str, Py_ssize_t len, /* input string */
2435 const char *pat, Py_ssize_t pat_len, /* pattern string to find */
2436 const char *sub, Py_ssize_t sub_len, /* substitution string */
2437 Py_ssize_t count, /* number of replacements */
2438 Py_ssize_t *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439{
2440 char *out_s;
2441 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002442 Py_ssize_t nfound, offset, new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002444 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002445 goto return_same;
2446
2447 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002448 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002449 if (count < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002450 count = PY_SSIZE_T_MAX;
Tim Peters9c012af2001-05-10 00:32:57 +00002451 else if (nfound > count)
2452 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002453 if (nfound == 0)
2454 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002455
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002456 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002457 if (new_len == 0) {
2458 /* Have to allocate something for the caller to free(). */
2459 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002460 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002461 return NULL;
2462 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002464 else {
2465 assert(new_len > 0);
2466 new_s = (char *)PyMem_MALLOC(new_len);
2467 if (new_s == NULL)
2468 return NULL;
2469 out_s = new_s;
2470
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002471 if (pat_len > 0) {
2472 for (; nfound > 0; --nfound) {
2473 /* find index of next instance of pattern */
2474 offset = mymemfind(str, len, pat, pat_len);
2475 if (offset == -1)
2476 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002477
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002478 /* copy non matching part of input string */
2479 memcpy(new_s, str, offset);
2480 str += offset + pat_len;
2481 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002482
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002483 /* copy substitute into the output string */
2484 new_s += offset;
2485 memcpy(new_s, sub, sub_len);
2486 new_s += sub_len;
2487 }
2488 /* copy any remaining values into output string */
2489 if (len > 0)
2490 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002491 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002492 else {
2493 for (;;++str, --len) {
2494 memcpy(new_s, sub, sub_len);
2495 new_s += sub_len;
2496 if (--nfound <= 0) {
2497 memcpy(new_s, str, len);
2498 break;
2499 }
2500 *new_s++ = *str;
2501 }
2502 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002503 }
2504 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505 return out_s;
2506
2507 return_same:
2508 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002509 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002510}
2511
2512
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002513PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002514"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002515\n\
2516Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002517old replaced by new. If the optional argument count is\n\
2518given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002519
2520static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002521string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002522{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002523 const char *str = PyString_AS_STRING(self), *sub, *repl;
2524 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002525 const Py_ssize_t len = PyString_GET_SIZE(self);
2526 Py_ssize_t sub_len, repl_len, out_len;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002527 Py_ssize_t count = -1;
2528 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002530
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002531 if (!PyArg_ParseTuple(args, "OO|n:replace",
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002533 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002534
2535 if (PyString_Check(subobj)) {
2536 sub = PyString_AS_STRING(subobj);
2537 sub_len = PyString_GET_SIZE(subobj);
2538 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002539#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002540 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002541 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002543#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2545 return NULL;
2546
2547 if (PyString_Check(replobj)) {
2548 repl = PyString_AS_STRING(replobj);
2549 repl_len = PyString_GET_SIZE(replobj);
2550 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002551#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002552 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002553 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002554 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002555#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002556 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2557 return NULL;
2558
Guido van Rossum4c08d552000-03-10 22:55:18 +00002559 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002560 if (new_s == NULL) {
2561 PyErr_NoMemory();
2562 return NULL;
2563 }
2564 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002565 if (PyString_CheckExact(self)) {
2566 /* we're returning another reference to self */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002567 newobj = (PyObject*)self;
2568 Py_INCREF(newobj);
Tim Peters8fa5dd02001-09-12 02:18:30 +00002569 }
2570 else {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002571 newobj = PyString_FromStringAndSize(str, len);
2572 if (newobj == NULL)
Tim Peters8fa5dd02001-09-12 02:18:30 +00002573 return NULL;
2574 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002575 }
2576 else {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002577 newobj = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002578 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002579 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002580 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002581}
2582
2583
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002584PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002585"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002586\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002587Return True if S starts with the specified prefix, False otherwise.\n\
2588With optional start, test S beginning at that position.\n\
2589With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002590
2591static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002592string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002593{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002594 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002595 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002596 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002597 Py_ssize_t plen;
2598 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002599 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002600 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002601
Guido van Rossumc6821402000-05-08 14:08:05 +00002602 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2603 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604 return NULL;
2605 if (PyString_Check(subobj)) {
2606 prefix = PyString_AS_STRING(subobj);
2607 plen = PyString_GET_SIZE(subobj);
2608 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002609#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002610 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002611 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002612 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002613 subobj, start, end, -1);
2614 if (rc == -1)
2615 return NULL;
2616 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002617 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002618 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002619#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002621 return NULL;
2622
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002623 string_adjust_indices(&start, &end, len);
2624
2625 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002626 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002627
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002628 if (end-start >= plen)
2629 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2630 else
2631 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002632}
2633
2634
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002635PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002636"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002637\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002638Return True if S ends with the specified suffix, False otherwise.\n\
2639With optional start, test S beginning at that position.\n\
2640With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002641
2642static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002643string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002644{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002645 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002646 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002647 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002648 Py_ssize_t slen;
2649 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002650 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002651 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002652
Guido van Rossumc6821402000-05-08 14:08:05 +00002653 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2654 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655 return NULL;
2656 if (PyString_Check(subobj)) {
2657 suffix = PyString_AS_STRING(subobj);
2658 slen = PyString_GET_SIZE(subobj);
2659 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002660#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002661 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002662 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002663 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002664 subobj, start, end, +1);
2665 if (rc == -1)
2666 return NULL;
2667 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002668 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002669 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002670#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002671 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002672 return NULL;
2673
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002674 string_adjust_indices(&start, &end, len);
2675
2676 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002677 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002678
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002679 if (end-slen > start)
2680 start = end - slen;
2681 if (end-start >= slen)
2682 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2683 else
2684 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002685}
2686
2687
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002688PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002689"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002690\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002691Encodes S using the codec registered for encoding. encoding defaults\n\
2692to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002693handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002694a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2695'xmlcharrefreplace' as well as any other name registered with\n\
2696codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002697
2698static PyObject *
2699string_encode(PyStringObject *self, PyObject *args)
2700{
2701 char *encoding = NULL;
2702 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002703 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002704
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002705 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2706 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002707 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002708 if (v == NULL)
2709 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002710 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2711 PyErr_Format(PyExc_TypeError,
2712 "encoder did not return a string/unicode object "
2713 "(type=%.400s)",
2714 v->ob_type->tp_name);
2715 Py_DECREF(v);
2716 return NULL;
2717 }
2718 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002719
2720 onError:
2721 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002722}
2723
2724
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002725PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002726"S.decode([encoding[,errors]]) -> object\n\
2727\n\
2728Decodes S using the codec registered for encoding. encoding defaults\n\
2729to the default encoding. errors may be given to set a different error\n\
2730handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002731a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2732as well as any other name registerd with codecs.register_error that is\n\
2733able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002734
2735static PyObject *
2736string_decode(PyStringObject *self, PyObject *args)
2737{
2738 char *encoding = NULL;
2739 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002740 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002741
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002742 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2743 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002744 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002745 if (v == NULL)
2746 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002747 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2748 PyErr_Format(PyExc_TypeError,
2749 "decoder did not return a string/unicode object "
2750 "(type=%.400s)",
2751 v->ob_type->tp_name);
2752 Py_DECREF(v);
2753 return NULL;
2754 }
2755 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002756
2757 onError:
2758 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002759}
2760
2761
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002762PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002763"S.expandtabs([tabsize]) -> string\n\
2764\n\
2765Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002766If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002767
2768static PyObject*
2769string_expandtabs(PyStringObject *self, PyObject *args)
2770{
2771 const char *e, *p;
2772 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002773 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002774 PyObject *u;
2775 int tabsize = 8;
2776
2777 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2778 return NULL;
2779
Thomas Wouters7e474022000-07-16 12:04:32 +00002780 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002781 i = j = 0;
2782 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2783 for (p = PyString_AS_STRING(self); p < e; p++)
2784 if (*p == '\t') {
2785 if (tabsize > 0)
2786 j += tabsize - (j % tabsize);
2787 }
2788 else {
2789 j++;
2790 if (*p == '\n' || *p == '\r') {
2791 i += j;
2792 j = 0;
2793 }
2794 }
2795
2796 /* Second pass: create output string and fill it */
2797 u = PyString_FromStringAndSize(NULL, i + j);
2798 if (!u)
2799 return NULL;
2800
2801 j = 0;
2802 q = PyString_AS_STRING(u);
2803
2804 for (p = PyString_AS_STRING(self); p < e; p++)
2805 if (*p == '\t') {
2806 if (tabsize > 0) {
2807 i = tabsize - (j % tabsize);
2808 j += i;
2809 while (i--)
2810 *q++ = ' ';
2811 }
2812 }
2813 else {
2814 j++;
2815 *q++ = *p;
2816 if (*p == '\n' || *p == '\r')
2817 j = 0;
2818 }
2819
2820 return u;
2821}
2822
Tim Peters8fa5dd02001-09-12 02:18:30 +00002823static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002824pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002825{
2826 PyObject *u;
2827
2828 if (left < 0)
2829 left = 0;
2830 if (right < 0)
2831 right = 0;
2832
Tim Peters8fa5dd02001-09-12 02:18:30 +00002833 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002834 Py_INCREF(self);
2835 return (PyObject *)self;
2836 }
2837
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002838 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002839 left + PyString_GET_SIZE(self) + right);
2840 if (u) {
2841 if (left)
2842 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002843 memcpy(PyString_AS_STRING(u) + left,
2844 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845 PyString_GET_SIZE(self));
2846 if (right)
2847 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2848 fill, right);
2849 }
2850
2851 return u;
2852}
2853
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002854PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002855"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002856"\n"
2857"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002858"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002859
2860static PyObject *
2861string_ljust(PyStringObject *self, PyObject *args)
2862{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002863 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002864 char fillchar = ' ';
2865
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002866 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867 return NULL;
2868
Tim Peters8fa5dd02001-09-12 02:18:30 +00002869 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002870 Py_INCREF(self);
2871 return (PyObject*) self;
2872 }
2873
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002874 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002875}
2876
2877
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002878PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002879"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002880"\n"
2881"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002882"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002883
2884static PyObject *
2885string_rjust(PyStringObject *self, PyObject *args)
2886{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002887 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002888 char fillchar = ' ';
2889
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002890 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002891 return NULL;
2892
Tim Peters8fa5dd02001-09-12 02:18:30 +00002893 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002894 Py_INCREF(self);
2895 return (PyObject*) self;
2896 }
2897
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002898 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002899}
2900
2901
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002902PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002903"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002904"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002905"Return S centered in a string of length width. Padding is\n"
2906"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002907
2908static PyObject *
2909string_center(PyStringObject *self, PyObject *args)
2910{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002911 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002912 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002913 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002914
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002915 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002916 return NULL;
2917
Tim Peters8fa5dd02001-09-12 02:18:30 +00002918 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002919 Py_INCREF(self);
2920 return (PyObject*) self;
2921 }
2922
2923 marg = width - PyString_GET_SIZE(self);
2924 left = marg / 2 + (marg & width & 1);
2925
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002926 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002927}
2928
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002929PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002930"S.zfill(width) -> string\n"
2931"\n"
2932"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002933"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002934
2935static PyObject *
2936string_zfill(PyStringObject *self, PyObject *args)
2937{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002938 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00002939 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002940 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002941 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00002942
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002943 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00002944 return NULL;
2945
2946 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002947 if (PyString_CheckExact(self)) {
2948 Py_INCREF(self);
2949 return (PyObject*) self;
2950 }
2951 else
2952 return PyString_FromStringAndSize(
2953 PyString_AS_STRING(self),
2954 PyString_GET_SIZE(self)
2955 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002956 }
2957
2958 fill = width - PyString_GET_SIZE(self);
2959
2960 s = pad(self, fill, 0, '0');
2961
2962 if (s == NULL)
2963 return NULL;
2964
2965 p = PyString_AS_STRING(s);
2966 if (p[fill] == '+' || p[fill] == '-') {
2967 /* move sign to beginning of string */
2968 p[0] = p[fill];
2969 p[fill] = '0';
2970 }
2971
2972 return (PyObject*) s;
2973}
2974
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002975PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002976"S.isspace() -> bool\n\
2977\n\
2978Return True if all characters in S are whitespace\n\
2979and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002980
2981static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002982string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002983{
Fred Drakeba096332000-07-09 07:04:36 +00002984 register const unsigned char *p
2985 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002986 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987
Guido van Rossum4c08d552000-03-10 22:55:18 +00002988 /* Shortcut for single character strings */
2989 if (PyString_GET_SIZE(self) == 1 &&
2990 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002991 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002992
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002993 /* Special case for empty strings */
2994 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002995 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002996
Guido van Rossum4c08d552000-03-10 22:55:18 +00002997 e = p + PyString_GET_SIZE(self);
2998 for (; p < e; p++) {
2999 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003000 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003001 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003002 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003003}
3004
3005
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003006PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003007"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003008\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003009Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003010and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003011
3012static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003013string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003014{
Fred Drakeba096332000-07-09 07:04:36 +00003015 register const unsigned char *p
3016 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003017 register const unsigned char *e;
3018
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003019 /* Shortcut for single character strings */
3020 if (PyString_GET_SIZE(self) == 1 &&
3021 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003022 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003023
3024 /* Special case for empty strings */
3025 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003026 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003027
3028 e = p + PyString_GET_SIZE(self);
3029 for (; p < e; p++) {
3030 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003031 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003032 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003033 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003034}
3035
3036
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003037PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003038"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003039\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003040Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003041and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003042
3043static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003044string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003045{
Fred Drakeba096332000-07-09 07:04:36 +00003046 register const unsigned char *p
3047 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003048 register const unsigned char *e;
3049
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003050 /* Shortcut for single character strings */
3051 if (PyString_GET_SIZE(self) == 1 &&
3052 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003053 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003054
3055 /* Special case for empty strings */
3056 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003057 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003058
3059 e = p + PyString_GET_SIZE(self);
3060 for (; p < e; p++) {
3061 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003062 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003063 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003064 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003065}
3066
3067
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003068PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003069"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003071Return True if all characters in S are digits\n\
3072and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003073
3074static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003075string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003076{
Fred Drakeba096332000-07-09 07:04:36 +00003077 register const unsigned char *p
3078 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003079 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080
Guido van Rossum4c08d552000-03-10 22:55:18 +00003081 /* Shortcut for single character strings */
3082 if (PyString_GET_SIZE(self) == 1 &&
3083 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003084 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003085
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003086 /* Special case for empty strings */
3087 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003088 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003089
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090 e = p + PyString_GET_SIZE(self);
3091 for (; p < e; p++) {
3092 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003093 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003094 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003095 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003096}
3097
3098
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003099PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003100"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003101\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003102Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003103at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003104
3105static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003106string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003107{
Fred Drakeba096332000-07-09 07:04:36 +00003108 register const unsigned char *p
3109 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003110 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003111 int cased;
3112
Guido van Rossum4c08d552000-03-10 22:55:18 +00003113 /* Shortcut for single character strings */
3114 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003115 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003116
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003117 /* Special case for empty strings */
3118 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003119 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003120
Guido van Rossum4c08d552000-03-10 22:55:18 +00003121 e = p + PyString_GET_SIZE(self);
3122 cased = 0;
3123 for (; p < e; p++) {
3124 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003125 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003126 else if (!cased && islower(*p))
3127 cased = 1;
3128 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003129 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003130}
3131
3132
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003133PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003134"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003135\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003136Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003137at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003138
3139static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003140string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003141{
Fred Drakeba096332000-07-09 07:04:36 +00003142 register const unsigned char *p
3143 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003144 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145 int cased;
3146
Guido van Rossum4c08d552000-03-10 22:55:18 +00003147 /* Shortcut for single character strings */
3148 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003149 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003151 /* Special case for empty strings */
3152 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003153 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003154
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 e = p + PyString_GET_SIZE(self);
3156 cased = 0;
3157 for (; p < e; p++) {
3158 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003159 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003160 else if (!cased && isupper(*p))
3161 cased = 1;
3162 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003163 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003164}
3165
3166
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003167PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003168"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003170Return True if S is a titlecased string and there is at least one\n\
3171character in S, i.e. uppercase characters may only follow uncased\n\
3172characters and lowercase characters only cased ones. Return False\n\
3173otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003174
3175static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003176string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003177{
Fred Drakeba096332000-07-09 07:04:36 +00003178 register const unsigned char *p
3179 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003180 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003181 int cased, previous_is_cased;
3182
Guido van Rossum4c08d552000-03-10 22:55:18 +00003183 /* Shortcut for single character strings */
3184 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003185 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003187 /* Special case for empty strings */
3188 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003189 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003190
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191 e = p + PyString_GET_SIZE(self);
3192 cased = 0;
3193 previous_is_cased = 0;
3194 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003195 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196
3197 if (isupper(ch)) {
3198 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003199 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003200 previous_is_cased = 1;
3201 cased = 1;
3202 }
3203 else if (islower(ch)) {
3204 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003205 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003206 previous_is_cased = 1;
3207 cased = 1;
3208 }
3209 else
3210 previous_is_cased = 0;
3211 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003212 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003213}
3214
3215
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003216PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003217"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003218\n\
3219Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003220Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003221is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003222
Guido van Rossum4c08d552000-03-10 22:55:18 +00003223static PyObject*
3224string_splitlines(PyStringObject *self, PyObject *args)
3225{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003226 register Py_ssize_t i;
3227 register Py_ssize_t j;
3228 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003229 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003230 PyObject *list;
3231 PyObject *str;
3232 char *data;
3233
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003234 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003235 return NULL;
3236
3237 data = PyString_AS_STRING(self);
3238 len = PyString_GET_SIZE(self);
3239
Guido van Rossum4c08d552000-03-10 22:55:18 +00003240 list = PyList_New(0);
3241 if (!list)
3242 goto onError;
3243
3244 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003245 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003246
Guido van Rossum4c08d552000-03-10 22:55:18 +00003247 /* Find a line and append it */
3248 while (i < len && data[i] != '\n' && data[i] != '\r')
3249 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003250
3251 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003252 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003253 if (i < len) {
3254 if (data[i] == '\r' && i + 1 < len &&
3255 data[i+1] == '\n')
3256 i += 2;
3257 else
3258 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003259 if (keepends)
3260 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003261 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003262 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003263 j = i;
3264 }
3265 if (j < len) {
3266 SPLIT_APPEND(data, j, len);
3267 }
3268
3269 return list;
3270
3271 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003272 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003273 return NULL;
3274}
3275
3276#undef SPLIT_APPEND
3277
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003278static PyObject *
3279string_getnewargs(PyStringObject *v)
3280{
3281 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3282}
3283
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003284
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003285static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003286string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003287 /* Counterparts of the obsolete stropmodule functions; except
3288 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003289 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3290 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003291 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003292 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3293 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003294 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3295 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3296 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3297 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3298 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3299 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3300 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003301 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3302 capitalize__doc__},
3303 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3304 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3305 endswith__doc__},
3306 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3307 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3308 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3309 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3310 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3311 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3312 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3313 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3314 startswith__doc__},
3315 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3316 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3317 swapcase__doc__},
3318 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3319 translate__doc__},
3320 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3321 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3322 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3323 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3324 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3325 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3326 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3327 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3328 expandtabs__doc__},
3329 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3330 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003331 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003332 {NULL, NULL} /* sentinel */
3333};
3334
Jeremy Hylton938ace62002-07-17 16:30:39 +00003335static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003336str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3337
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003338static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003339string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003340{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003341 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003342 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003343
Guido van Rossumae960af2001-08-30 03:11:59 +00003344 if (type != &PyString_Type)
3345 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003346 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3347 return NULL;
3348 if (x == NULL)
3349 return PyString_FromString("");
3350 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003351}
3352
Guido van Rossumae960af2001-08-30 03:11:59 +00003353static PyObject *
3354str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3355{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003356 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003357 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003358
3359 assert(PyType_IsSubtype(type, &PyString_Type));
3360 tmp = string_new(&PyString_Type, args, kwds);
3361 if (tmp == NULL)
3362 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003363 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003364 n = PyString_GET_SIZE(tmp);
3365 pnew = type->tp_alloc(type, n);
3366 if (pnew != NULL) {
3367 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003368 ((PyStringObject *)pnew)->ob_shash =
3369 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003370 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003371 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003372 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003373 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003374}
3375
Guido van Rossumcacfc072002-05-24 19:01:59 +00003376static PyObject *
3377basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3378{
3379 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003380 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003381 return NULL;
3382}
3383
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003384static PyObject *
3385string_mod(PyObject *v, PyObject *w)
3386{
3387 if (!PyString_Check(v)) {
3388 Py_INCREF(Py_NotImplemented);
3389 return Py_NotImplemented;
3390 }
3391 return PyString_Format(v, w);
3392}
3393
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003394PyDoc_STRVAR(basestring_doc,
3395"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003396
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003397static PyNumberMethods string_as_number = {
3398 0, /*nb_add*/
3399 0, /*nb_subtract*/
3400 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003401 string_mod, /*nb_remainder*/
3402};
3403
3404
Guido van Rossumcacfc072002-05-24 19:01:59 +00003405PyTypeObject PyBaseString_Type = {
3406 PyObject_HEAD_INIT(&PyType_Type)
3407 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003408 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003409 0,
3410 0,
3411 0, /* tp_dealloc */
3412 0, /* tp_print */
3413 0, /* tp_getattr */
3414 0, /* tp_setattr */
3415 0, /* tp_compare */
3416 0, /* tp_repr */
3417 0, /* tp_as_number */
3418 0, /* tp_as_sequence */
3419 0, /* tp_as_mapping */
3420 0, /* tp_hash */
3421 0, /* tp_call */
3422 0, /* tp_str */
3423 0, /* tp_getattro */
3424 0, /* tp_setattro */
3425 0, /* tp_as_buffer */
3426 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3427 basestring_doc, /* tp_doc */
3428 0, /* tp_traverse */
3429 0, /* tp_clear */
3430 0, /* tp_richcompare */
3431 0, /* tp_weaklistoffset */
3432 0, /* tp_iter */
3433 0, /* tp_iternext */
3434 0, /* tp_methods */
3435 0, /* tp_members */
3436 0, /* tp_getset */
3437 &PyBaseObject_Type, /* tp_base */
3438 0, /* tp_dict */
3439 0, /* tp_descr_get */
3440 0, /* tp_descr_set */
3441 0, /* tp_dictoffset */
3442 0, /* tp_init */
3443 0, /* tp_alloc */
3444 basestring_new, /* tp_new */
3445 0, /* tp_free */
3446};
3447
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003448PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003449"str(object) -> string\n\
3450\n\
3451Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003452If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003453
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003454PyTypeObject PyString_Type = {
3455 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003456 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003457 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003458 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003459 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003460 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003461 (printfunc)string_print, /* tp_print */
3462 0, /* tp_getattr */
3463 0, /* tp_setattr */
3464 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003465 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003466 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003467 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003468 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003469 (hashfunc)string_hash, /* tp_hash */
3470 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003471 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003472 PyObject_GenericGetAttr, /* tp_getattro */
3473 0, /* tp_setattro */
3474 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003475 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003476 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003477 string_doc, /* tp_doc */
3478 0, /* tp_traverse */
3479 0, /* tp_clear */
3480 (richcmpfunc)string_richcompare, /* tp_richcompare */
3481 0, /* tp_weaklistoffset */
3482 0, /* tp_iter */
3483 0, /* tp_iternext */
3484 string_methods, /* tp_methods */
3485 0, /* tp_members */
3486 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003487 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003488 0, /* tp_dict */
3489 0, /* tp_descr_get */
3490 0, /* tp_descr_set */
3491 0, /* tp_dictoffset */
3492 0, /* tp_init */
3493 0, /* tp_alloc */
3494 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003495 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003496};
3497
3498void
Fred Drakeba096332000-07-09 07:04:36 +00003499PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003500{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003501 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003502 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003503 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003504 if (w == NULL || !PyString_Check(*pv)) {
3505 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003506 *pv = NULL;
3507 return;
3508 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003509 v = string_concat((PyStringObject *) *pv, w);
3510 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003511 *pv = v;
3512}
3513
Guido van Rossum013142a1994-08-30 08:19:36 +00003514void
Fred Drakeba096332000-07-09 07:04:36 +00003515PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003516{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003517 PyString_Concat(pv, w);
3518 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003519}
3520
3521
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003522/* The following function breaks the notion that strings are immutable:
3523 it changes the size of a string. We get away with this only if there
3524 is only one module referencing the object. You can also think of it
3525 as creating a new string object and destroying the old one, only
3526 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003527 already be known to some other part of the code...
3528 Note that if there's not enough memory to resize the string, the original
3529 string object at *pv is deallocated, *pv is set to NULL, an "out of
3530 memory" exception is set, and -1 is returned. Else (on success) 0 is
3531 returned, and the value in *pv may or may not be the same as on input.
3532 As always, an extra byte is allocated for a trailing \0 byte (newsize
3533 does *not* include that), and a trailing \0 byte is stored.
3534*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003535
3536int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003537_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003538{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003539 register PyObject *v;
3540 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003541 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003542 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3543 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003544 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003545 Py_DECREF(v);
3546 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003547 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003548 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003549 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003550 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003551 _Py_ForgetReference(v);
3552 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003553 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003554 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003555 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003556 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003557 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003558 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003559 _Py_NewReference(*pv);
3560 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003561 sv->ob_size = newsize;
3562 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003563 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003564 return 0;
3565}
Guido van Rossume5372401993-03-16 12:15:04 +00003566
3567/* Helpers for formatstring */
3568
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003569static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00003570getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003571{
Thomas Wouters977485d2006-02-16 15:59:12 +00003572 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003573 if (argidx < arglen) {
3574 (*p_argidx)++;
3575 if (arglen < 0)
3576 return args;
3577 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003578 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003579 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003580 PyErr_SetString(PyExc_TypeError,
3581 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003582 return NULL;
3583}
3584
Tim Peters38fd5b62000-09-21 05:43:11 +00003585/* Format codes
3586 * F_LJUST '-'
3587 * F_SIGN '+'
3588 * F_BLANK ' '
3589 * F_ALT '#'
3590 * F_ZERO '0'
3591 */
Guido van Rossume5372401993-03-16 12:15:04 +00003592#define F_LJUST (1<<0)
3593#define F_SIGN (1<<1)
3594#define F_BLANK (1<<2)
3595#define F_ALT (1<<3)
3596#define F_ZERO (1<<4)
3597
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003598static int
Fred Drakeba096332000-07-09 07:04:36 +00003599formatfloat(char *buf, size_t buflen, int flags,
3600 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003601{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003602 /* fmt = '%#.' + `prec` + `type`
3603 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003604 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003605 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003606 x = PyFloat_AsDouble(v);
3607 if (x == -1.0 && PyErr_Occurred()) {
3608 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003609 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003610 }
Guido van Rossume5372401993-03-16 12:15:04 +00003611 if (prec < 0)
3612 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003613 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3614 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003615 /* Worst case length calc to ensure no buffer overrun:
3616
3617 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003618 fmt = %#.<prec>g
3619 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003620 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003621 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003622
3623 'f' formats:
3624 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3625 len = 1 + 50 + 1 + prec = 52 + prec
3626
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003627 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003628 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003629
3630 */
3631 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3632 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003633 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003634 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003635 return -1;
3636 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003637 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3638 (flags&F_ALT) ? "#" : "",
3639 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003640 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003641 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003642}
3643
Tim Peters38fd5b62000-09-21 05:43:11 +00003644/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3645 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3646 * Python's regular ints.
3647 * Return value: a new PyString*, or NULL if error.
3648 * . *pbuf is set to point into it,
3649 * *plen set to the # of chars following that.
3650 * Caller must decref it when done using pbuf.
3651 * The string starting at *pbuf is of the form
3652 * "-"? ("0x" | "0X")? digit+
3653 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003654 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003655 * There will be at least prec digits, zero-filled on the left if
3656 * necessary to get that many.
3657 * val object to be converted
3658 * flags bitmask of format flags; only F_ALT is looked at
3659 * prec minimum number of digits; 0-fill on left if needed
3660 * type a character in [duoxX]; u acts the same as d
3661 *
3662 * CAUTION: o, x and X conversions on regular ints can never
3663 * produce a '-' sign, but can for Python's unbounded ints.
3664 */
3665PyObject*
3666_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3667 char **pbuf, int *plen)
3668{
3669 PyObject *result = NULL;
3670 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003671 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003672 int sign; /* 1 if '-', else 0 */
3673 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003674 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003675 int numdigits; /* len == numnondigits + numdigits */
3676 int numnondigits = 0;
3677
3678 switch (type) {
3679 case 'd':
3680 case 'u':
3681 result = val->ob_type->tp_str(val);
3682 break;
3683 case 'o':
3684 result = val->ob_type->tp_as_number->nb_oct(val);
3685 break;
3686 case 'x':
3687 case 'X':
3688 numnondigits = 2;
3689 result = val->ob_type->tp_as_number->nb_hex(val);
3690 break;
3691 default:
3692 assert(!"'type' not in [duoxX]");
3693 }
3694 if (!result)
3695 return NULL;
3696
3697 /* To modify the string in-place, there can only be one reference. */
3698 if (result->ob_refcnt != 1) {
3699 PyErr_BadInternalCall();
3700 return NULL;
3701 }
3702 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00003703 llen = PyString_Size(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003704 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00003705 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
3706 return NULL;
3707 }
3708 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003709 if (buf[len-1] == 'L') {
3710 --len;
3711 buf[len] = '\0';
3712 }
3713 sign = buf[0] == '-';
3714 numnondigits += sign;
3715 numdigits = len - numnondigits;
3716 assert(numdigits > 0);
3717
Tim Petersfff53252001-04-12 18:38:48 +00003718 /* Get rid of base marker unless F_ALT */
3719 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003720 /* Need to skip 0x, 0X or 0. */
3721 int skipped = 0;
3722 switch (type) {
3723 case 'o':
3724 assert(buf[sign] == '0');
3725 /* If 0 is only digit, leave it alone. */
3726 if (numdigits > 1) {
3727 skipped = 1;
3728 --numdigits;
3729 }
3730 break;
3731 case 'x':
3732 case 'X':
3733 assert(buf[sign] == '0');
3734 assert(buf[sign + 1] == 'x');
3735 skipped = 2;
3736 numnondigits -= 2;
3737 break;
3738 }
3739 if (skipped) {
3740 buf += skipped;
3741 len -= skipped;
3742 if (sign)
3743 buf[0] = '-';
3744 }
3745 assert(len == numnondigits + numdigits);
3746 assert(numdigits > 0);
3747 }
3748
3749 /* Fill with leading zeroes to meet minimum width. */
3750 if (prec > numdigits) {
3751 PyObject *r1 = PyString_FromStringAndSize(NULL,
3752 numnondigits + prec);
3753 char *b1;
3754 if (!r1) {
3755 Py_DECREF(result);
3756 return NULL;
3757 }
3758 b1 = PyString_AS_STRING(r1);
3759 for (i = 0; i < numnondigits; ++i)
3760 *b1++ = *buf++;
3761 for (i = 0; i < prec - numdigits; i++)
3762 *b1++ = '0';
3763 for (i = 0; i < numdigits; i++)
3764 *b1++ = *buf++;
3765 *b1 = '\0';
3766 Py_DECREF(result);
3767 result = r1;
3768 buf = PyString_AS_STRING(result);
3769 len = numnondigits + prec;
3770 }
3771
3772 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003773 if (type == 'X') {
3774 /* Need to convert all lower case letters to upper case.
3775 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003776 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003777 if (buf[i] >= 'a' && buf[i] <= 'x')
3778 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003779 }
3780 *pbuf = buf;
3781 *plen = len;
3782 return result;
3783}
3784
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003785static int
Fred Drakeba096332000-07-09 07:04:36 +00003786formatint(char *buf, size_t buflen, int flags,
3787 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003788{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003789 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003790 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3791 + 1 + 1 = 24 */
3792 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003793 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003794 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003795
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003796 x = PyInt_AsLong(v);
3797 if (x == -1 && PyErr_Occurred()) {
3798 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003799 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003800 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003801 if (x < 0 && type == 'u') {
3802 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003803 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003804 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3805 sign = "-";
3806 else
3807 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003808 if (prec < 0)
3809 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003810
3811 if ((flags & F_ALT) &&
3812 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003813 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003814 * of issues that cause pain:
3815 * - when 0 is being converted, the C standard leaves off
3816 * the '0x' or '0X', which is inconsistent with other
3817 * %#x/%#X conversions and inconsistent with Python's
3818 * hex() function
3819 * - there are platforms that violate the standard and
3820 * convert 0 with the '0x' or '0X'
3821 * (Metrowerks, Compaq Tru64)
3822 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003823 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003824 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003825 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003826 * We can achieve the desired consistency by inserting our
3827 * own '0x' or '0X' prefix, and substituting %x/%X in place
3828 * of %#x/%#X.
3829 *
3830 * Note that this is the same approach as used in
3831 * formatint() in unicodeobject.c
3832 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003833 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3834 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003835 }
3836 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003837 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3838 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003839 prec, type);
3840 }
3841
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003842 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3843 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003844 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003845 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003846 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003847 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003848 return -1;
3849 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003850 if (sign[0])
3851 PyOS_snprintf(buf, buflen, fmt, -x);
3852 else
3853 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003854 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003855}
3856
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003857static int
Fred Drakeba096332000-07-09 07:04:36 +00003858formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003859{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003860 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003861 if (PyString_Check(v)) {
3862 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003863 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003864 }
3865 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003866 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003867 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003868 }
3869 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003870 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003871}
3872
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003873/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3874
3875 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3876 chars are formatted. XXX This is a magic number. Each formatting
3877 routine does bounds checking to ensure no overflow, but a better
3878 solution may be to malloc a buffer of appropriate size for each
3879 format. For now, the current solution is sufficient.
3880*/
3881#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003882
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003883PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003884PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003885{
3886 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003887 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003888 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00003889 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003890 PyObject *result, *orig_args;
3891#ifdef Py_USING_UNICODE
3892 PyObject *v, *w;
3893#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003894 PyObject *dict = NULL;
3895 if (format == NULL || !PyString_Check(format) || args == NULL) {
3896 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003897 return NULL;
3898 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003899 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003900 fmt = PyString_AS_STRING(format);
3901 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003902 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003903 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003904 if (result == NULL)
3905 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003906 res = PyString_AsString(result);
3907 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003908 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003909 argidx = 0;
3910 }
3911 else {
3912 arglen = -1;
3913 argidx = -2;
3914 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003915 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3916 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003917 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003918 while (--fmtcnt >= 0) {
3919 if (*fmt != '%') {
3920 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003921 rescnt = fmtcnt + 100;
3922 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003923 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003924 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003925 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003926 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003927 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003928 }
3929 *res++ = *fmt++;
3930 }
3931 else {
3932 /* Got a format specifier */
3933 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003934 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003935 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003936 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003937 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003938 PyObject *v = NULL;
3939 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003940 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003941 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003942 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003943 char formatbuf[FORMATBUFLEN];
3944 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003945#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003946 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003947 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003948#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003949
Guido van Rossumda9c2711996-12-05 21:58:58 +00003950 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003951 if (*fmt == '(') {
3952 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003953 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003954 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003955 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003956
3957 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003958 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003959 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003960 goto error;
3961 }
3962 ++fmt;
3963 --fmtcnt;
3964 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003965 /* Skip over balanced parentheses */
3966 while (pcount > 0 && --fmtcnt >= 0) {
3967 if (*fmt == ')')
3968 --pcount;
3969 else if (*fmt == '(')
3970 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003971 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003972 }
3973 keylen = fmt - keystart - 1;
3974 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003975 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003976 "incomplete format key");
3977 goto error;
3978 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003979 key = PyString_FromStringAndSize(keystart,
3980 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003981 if (key == NULL)
3982 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003983 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003984 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003985 args_owned = 0;
3986 }
3987 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003988 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003989 if (args == NULL) {
3990 goto error;
3991 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003992 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003993 arglen = -1;
3994 argidx = -2;
3995 }
Guido van Rossume5372401993-03-16 12:15:04 +00003996 while (--fmtcnt >= 0) {
3997 switch (c = *fmt++) {
3998 case '-': flags |= F_LJUST; continue;
3999 case '+': flags |= F_SIGN; continue;
4000 case ' ': flags |= F_BLANK; continue;
4001 case '#': flags |= F_ALT; continue;
4002 case '0': flags |= F_ZERO; continue;
4003 }
4004 break;
4005 }
4006 if (c == '*') {
4007 v = getnextarg(args, arglen, &argidx);
4008 if (v == NULL)
4009 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004010 if (!PyInt_Check(v)) {
4011 PyErr_SetString(PyExc_TypeError,
4012 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004013 goto error;
4014 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004015 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004016 if (width < 0) {
4017 flags |= F_LJUST;
4018 width = -width;
4019 }
Guido van Rossume5372401993-03-16 12:15:04 +00004020 if (--fmtcnt >= 0)
4021 c = *fmt++;
4022 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004023 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004024 width = c - '0';
4025 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004026 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004027 if (!isdigit(c))
4028 break;
4029 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004030 PyErr_SetString(
4031 PyExc_ValueError,
4032 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004033 goto error;
4034 }
4035 width = width*10 + (c - '0');
4036 }
4037 }
4038 if (c == '.') {
4039 prec = 0;
4040 if (--fmtcnt >= 0)
4041 c = *fmt++;
4042 if (c == '*') {
4043 v = getnextarg(args, arglen, &argidx);
4044 if (v == NULL)
4045 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004046 if (!PyInt_Check(v)) {
4047 PyErr_SetString(
4048 PyExc_TypeError,
4049 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004050 goto error;
4051 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004052 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004053 if (prec < 0)
4054 prec = 0;
4055 if (--fmtcnt >= 0)
4056 c = *fmt++;
4057 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004058 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004059 prec = c - '0';
4060 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004061 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004062 if (!isdigit(c))
4063 break;
4064 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004065 PyErr_SetString(
4066 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004067 "prec too big");
4068 goto error;
4069 }
4070 prec = prec*10 + (c - '0');
4071 }
4072 }
4073 } /* prec */
4074 if (fmtcnt >= 0) {
4075 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004076 if (--fmtcnt >= 0)
4077 c = *fmt++;
4078 }
4079 }
4080 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004081 PyErr_SetString(PyExc_ValueError,
4082 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004083 goto error;
4084 }
4085 if (c != '%') {
4086 v = getnextarg(args, arglen, &argidx);
4087 if (v == NULL)
4088 goto error;
4089 }
4090 sign = 0;
4091 fill = ' ';
4092 switch (c) {
4093 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004094 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004095 len = 1;
4096 break;
4097 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004098#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004099 if (PyUnicode_Check(v)) {
4100 fmt = fmt_start;
4101 argidx = argidx_start;
4102 goto unicode;
4103 }
Georg Brandld45014b2005-10-01 17:06:00 +00004104#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004105 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004106#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004107 if (temp != NULL && PyUnicode_Check(temp)) {
4108 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004109 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004110 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004111 goto unicode;
4112 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004113#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004114 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004115 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004116 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004117 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004118 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004119 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004120 if (!PyString_Check(temp)) {
4121 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004122 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004123 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004124 goto error;
4125 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004126 pbuf = PyString_AS_STRING(temp);
4127 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004128 if (prec >= 0 && len > prec)
4129 len = prec;
4130 break;
4131 case 'i':
4132 case 'd':
4133 case 'u':
4134 case 'o':
4135 case 'x':
4136 case 'X':
4137 if (c == 'i')
4138 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004139 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004140 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004141 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004142 prec, c, &pbuf, &ilen);
4143 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004144 if (!temp)
4145 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004146 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004147 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004148 else {
4149 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004150 len = formatint(pbuf,
4151 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004152 flags, prec, c, v);
4153 if (len < 0)
4154 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004155 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004156 }
4157 if (flags & F_ZERO)
4158 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004159 break;
4160 case 'e':
4161 case 'E':
4162 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004163 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004164 case 'g':
4165 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004166 if (c == 'F')
4167 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004168 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004169 len = formatfloat(pbuf, sizeof(formatbuf),
4170 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004171 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004172 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004173 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004174 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004175 fill = '0';
4176 break;
4177 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004178#ifdef Py_USING_UNICODE
4179 if (PyUnicode_Check(v)) {
4180 fmt = fmt_start;
4181 argidx = argidx_start;
4182 goto unicode;
4183 }
4184#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004185 pbuf = formatbuf;
4186 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004187 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004188 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004189 break;
4190 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004191 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004192 "unsupported format character '%c' (0x%x) "
4193 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004194 c, c,
4195 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004196 goto error;
4197 }
4198 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004199 if (*pbuf == '-' || *pbuf == '+') {
4200 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004201 len--;
4202 }
4203 else if (flags & F_SIGN)
4204 sign = '+';
4205 else if (flags & F_BLANK)
4206 sign = ' ';
4207 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004208 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004209 }
4210 if (width < len)
4211 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004212 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004213 reslen -= rescnt;
4214 rescnt = width + fmtcnt + 100;
4215 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004216 if (reslen < 0) {
4217 Py_DECREF(result);
4218 return PyErr_NoMemory();
4219 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004220 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004221 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004222 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004223 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004224 }
4225 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004226 if (fill != ' ')
4227 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004228 rescnt--;
4229 if (width > len)
4230 width--;
4231 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004232 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4233 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004234 assert(pbuf[1] == c);
4235 if (fill != ' ') {
4236 *res++ = *pbuf++;
4237 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004238 }
Tim Petersfff53252001-04-12 18:38:48 +00004239 rescnt -= 2;
4240 width -= 2;
4241 if (width < 0)
4242 width = 0;
4243 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004244 }
4245 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004246 do {
4247 --rescnt;
4248 *res++ = fill;
4249 } while (--width > len);
4250 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004251 if (fill == ' ') {
4252 if (sign)
4253 *res++ = sign;
4254 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004255 (c == 'x' || c == 'X')) {
4256 assert(pbuf[0] == '0');
4257 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004258 *res++ = *pbuf++;
4259 *res++ = *pbuf++;
4260 }
4261 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004262 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004263 res += len;
4264 rescnt -= len;
4265 while (--width >= len) {
4266 --rescnt;
4267 *res++ = ' ';
4268 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004269 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004270 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004271 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004272 goto error;
4273 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004274 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004275 } /* '%' */
4276 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004277 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004278 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004279 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004280 goto error;
4281 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004282 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004283 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004284 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004285 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004286 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004287
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004288#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004289 unicode:
4290 if (args_owned) {
4291 Py_DECREF(args);
4292 args_owned = 0;
4293 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004294 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004295 if (PyTuple_Check(orig_args) && argidx > 0) {
4296 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004297 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004298 v = PyTuple_New(n);
4299 if (v == NULL)
4300 goto error;
4301 while (--n >= 0) {
4302 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4303 Py_INCREF(w);
4304 PyTuple_SET_ITEM(v, n, w);
4305 }
4306 args = v;
4307 } else {
4308 Py_INCREF(orig_args);
4309 args = orig_args;
4310 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004311 args_owned = 1;
4312 /* Take what we have of the result and let the Unicode formatting
4313 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004314 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004315 if (_PyString_Resize(&result, rescnt))
4316 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004317 fmtcnt = PyString_GET_SIZE(format) - \
4318 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004319 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4320 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004321 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004322 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004323 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004324 if (v == NULL)
4325 goto error;
4326 /* Paste what we have (result) to what the Unicode formatting
4327 function returned (v) and return the result (or error) */
4328 w = PyUnicode_Concat(result, v);
4329 Py_DECREF(result);
4330 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004331 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004332 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004333#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004334
Guido van Rossume5372401993-03-16 12:15:04 +00004335 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004336 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004337 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004338 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004339 }
Guido van Rossume5372401993-03-16 12:15:04 +00004340 return NULL;
4341}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004342
Guido van Rossum2a61e741997-01-18 07:55:05 +00004343void
Fred Drakeba096332000-07-09 07:04:36 +00004344PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004345{
4346 register PyStringObject *s = (PyStringObject *)(*p);
4347 PyObject *t;
4348 if (s == NULL || !PyString_Check(s))
4349 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004350 /* If it's a string subclass, we don't really know what putting
4351 it in the interned dict might do. */
4352 if (!PyString_CheckExact(s))
4353 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004354 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004355 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004356 if (interned == NULL) {
4357 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004358 if (interned == NULL) {
4359 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004360 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004361 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004362 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004363 t = PyDict_GetItem(interned, (PyObject *)s);
4364 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004365 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004366 Py_DECREF(*p);
4367 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004368 return;
4369 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004370
Armin Rigo79f7ad22004-08-07 19:27:39 +00004371 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004372 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004373 return;
4374 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004375 /* The two references in interned are not counted by refcnt.
4376 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004377 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004378 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004379}
4380
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004381void
4382PyString_InternImmortal(PyObject **p)
4383{
4384 PyString_InternInPlace(p);
4385 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4386 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4387 Py_INCREF(*p);
4388 }
4389}
4390
Guido van Rossum2a61e741997-01-18 07:55:05 +00004391
4392PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004393PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004394{
4395 PyObject *s = PyString_FromString(cp);
4396 if (s == NULL)
4397 return NULL;
4398 PyString_InternInPlace(&s);
4399 return s;
4400}
4401
Guido van Rossum8cf04761997-08-02 02:57:45 +00004402void
Fred Drakeba096332000-07-09 07:04:36 +00004403PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004404{
4405 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004406 for (i = 0; i < UCHAR_MAX + 1; i++) {
4407 Py_XDECREF(characters[i]);
4408 characters[i] = NULL;
4409 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004410 Py_XDECREF(nullstring);
4411 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004412}
Barry Warsawa903ad982001-02-23 16:40:48 +00004413
Barry Warsawa903ad982001-02-23 16:40:48 +00004414void _Py_ReleaseInternedStrings(void)
4415{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004416 PyObject *keys;
4417 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004418 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004419
4420 if (interned == NULL || !PyDict_Check(interned))
4421 return;
4422 keys = PyDict_Keys(interned);
4423 if (keys == NULL || !PyList_Check(keys)) {
4424 PyErr_Clear();
4425 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004426 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004427
4428 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4429 detector, interned strings are not forcibly deallocated; rather, we
4430 give them their stolen references back, and then clear and DECREF
4431 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004432
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004433 fprintf(stderr, "releasing interned strings\n");
4434 n = PyList_GET_SIZE(keys);
4435 for (i = 0; i < n; i++) {
4436 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4437 switch (s->ob_sstate) {
4438 case SSTATE_NOT_INTERNED:
4439 /* XXX Shouldn't happen */
4440 break;
4441 case SSTATE_INTERNED_IMMORTAL:
4442 s->ob_refcnt += 1;
4443 break;
4444 case SSTATE_INTERNED_MORTAL:
4445 s->ob_refcnt += 2;
4446 break;
4447 default:
4448 Py_FatalError("Inconsistent interned string state.");
4449 }
4450 s->ob_sstate = SSTATE_NOT_INTERNED;
4451 }
4452 Py_DECREF(keys);
4453 PyDict_Clear(interned);
4454 Py_DECREF(interned);
4455 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004456}