blob: b90221a6b87ce2e91434837f0b29d0d1a6cee1f2 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000157 int n = 0;
158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000184
Barry Warsawdadace02001-08-24 18:32:06 +0000185 switch (*f) {
186 case 'c':
187 (void)va_arg(count, int);
188 /* fall through... */
189 case '%':
190 n++;
191 break;
192 case 'd': case 'i': case 'x':
193 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000194 /* 20 bytes is enough to hold a 64-bit
195 integer. Decimal takes the most space.
196 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000197 n += 20;
198 break;
199 case 's':
200 s = va_arg(count, char*);
201 n += strlen(s);
202 break;
203 case 'p':
204 (void) va_arg(count, int);
205 /* maximum 64-bit pointer representation:
206 * 0xffffffffffffffff
207 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000208 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000209 */
210 n += 19;
211 break;
212 default:
213 /* if we stumble upon an unknown
214 formatting code, copy the rest of
215 the format string to the output
216 string. (we cannot just skip the
217 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000218 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 n += strlen(p);
220 goto expand;
221 }
222 } else
223 n++;
224 }
225 expand:
226 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000227 /* Since we've analyzed how much space we need for the worst case,
228 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000229 string = PyString_FromStringAndSize(NULL, n);
230 if (!string)
231 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000232
Barry Warsawdadace02001-08-24 18:32:06 +0000233 s = PyString_AsString(string);
234
235 for (f = format; *f; f++) {
236 if (*f == '%') {
237 const char* p = f++;
238 int i, longflag = 0;
239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
242 while (isdigit(Py_CHARMASK(*f)))
243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 }
250 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
251 f++;
252 /* handle the long flag, but only for %ld. others
253 can be added when necessary. */
254 if (*f == 'l' && *(f+1) == 'd') {
255 longflag = 1;
256 ++f;
257 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000258
Barry Warsawdadace02001-08-24 18:32:06 +0000259 switch (*f) {
260 case 'c':
261 *s++ = va_arg(vargs, int);
262 break;
263 case 'd':
264 if (longflag)
265 sprintf(s, "%ld", va_arg(vargs, long));
266 else
267 sprintf(s, "%d", va_arg(vargs, int));
268 s += strlen(s);
269 break;
270 case 'i':
271 sprintf(s, "%i", va_arg(vargs, int));
272 s += strlen(s);
273 break;
274 case 'x':
275 sprintf(s, "%x", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 's':
279 p = va_arg(vargs, char*);
280 i = strlen(p);
281 if (n > 0 && i > n)
282 i = n;
283 memcpy(s, p, i);
284 s += i;
285 break;
286 case 'p':
287 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000288 /* %p is ill-defined: ensure leading 0x. */
289 if (s[1] == 'X')
290 s[1] = 'x';
291 else if (s[1] != 'x') {
292 memmove(s+2, s, strlen(s)+1);
293 s[0] = '0';
294 s[1] = 'x';
295 }
Barry Warsawdadace02001-08-24 18:32:06 +0000296 s += strlen(s);
297 break;
298 case '%':
299 *s++ = '%';
300 break;
301 default:
302 strcpy(s, p);
303 s += strlen(s);
304 goto end;
305 }
306 } else
307 *s++ = *f;
308 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000309
Barry Warsawdadace02001-08-24 18:32:06 +0000310 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000311 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000312 return string;
313}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000316PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000317{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000318 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319 va_list vargs;
320
321#ifdef HAVE_STDARG_PROTOTYPES
322 va_start(vargs, format);
323#else
324 va_start(vargs);
325#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000326 ret = PyString_FromFormatV(format, vargs);
327 va_end(vargs);
328 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000329}
330
331
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000332PyObject *PyString_Decode(const char *s,
333 int size,
334 const char *encoding,
335 const char *errors)
336{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000337 PyObject *v, *str;
338
339 str = PyString_FromStringAndSize(s, size);
340 if (str == NULL)
341 return NULL;
342 v = PyString_AsDecodedString(str, encoding, errors);
343 Py_DECREF(str);
344 return v;
345}
346
347PyObject *PyString_AsDecodedObject(PyObject *str,
348 const char *encoding,
349 const char *errors)
350{
351 PyObject *v;
352
353 if (!PyString_Check(str)) {
354 PyErr_BadArgument();
355 goto onError;
356 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000357
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000358 if (encoding == NULL) {
359#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000361#else
362 PyErr_SetString(PyExc_ValueError, "no encoding specified");
363 goto onError;
364#endif
365 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000366
367 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000368 v = PyCodec_Decode(str, encoding, errors);
369 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000370 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000371
372 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000373
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000374 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000375 return NULL;
376}
377
378PyObject *PyString_AsDecodedString(PyObject *str,
379 const char *encoding,
380 const char *errors)
381{
382 PyObject *v;
383
384 v = PyString_AsDecodedObject(str, encoding, errors);
385 if (v == NULL)
386 goto onError;
387
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000388#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389 /* Convert Unicode to a string using the default encoding */
390 if (PyUnicode_Check(v)) {
391 PyObject *temp = v;
392 v = PyUnicode_AsEncodedString(v, NULL, NULL);
393 Py_DECREF(temp);
394 if (v == NULL)
395 goto onError;
396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000397#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 if (!PyString_Check(v)) {
399 PyErr_Format(PyExc_TypeError,
400 "decoder did not return a string object (type=%.400s)",
401 v->ob_type->tp_name);
402 Py_DECREF(v);
403 goto onError;
404 }
405
406 return v;
407
408 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 return NULL;
410}
411
412PyObject *PyString_Encode(const char *s,
413 int size,
414 const char *encoding,
415 const char *errors)
416{
417 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000418
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000419 str = PyString_FromStringAndSize(s, size);
420 if (str == NULL)
421 return NULL;
422 v = PyString_AsEncodedString(str, encoding, errors);
423 Py_DECREF(str);
424 return v;
425}
426
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000427PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000428 const char *encoding,
429 const char *errors)
430{
431 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000432
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 if (!PyString_Check(str)) {
434 PyErr_BadArgument();
435 goto onError;
436 }
437
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000438 if (encoding == NULL) {
439#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000440 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000441#else
442 PyErr_SetString(PyExc_ValueError, "no encoding specified");
443 goto onError;
444#endif
445 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446
447 /* Encode via the codec registry */
448 v = PyCodec_Encode(str, encoding, errors);
449 if (v == NULL)
450 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451
452 return v;
453
454 onError:
455 return NULL;
456}
457
458PyObject *PyString_AsEncodedString(PyObject *str,
459 const char *encoding,
460 const char *errors)
461{
462 PyObject *v;
463
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000464 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000465 if (v == NULL)
466 goto onError;
467
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000468#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469 /* Convert Unicode to a string using the default encoding */
470 if (PyUnicode_Check(v)) {
471 PyObject *temp = v;
472 v = PyUnicode_AsEncodedString(v, NULL, NULL);
473 Py_DECREF(temp);
474 if (v == NULL)
475 goto onError;
476 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000477#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 if (!PyString_Check(v)) {
479 PyErr_Format(PyExc_TypeError,
480 "encoder did not return a string object (type=%.400s)",
481 v->ob_type->tp_name);
482 Py_DECREF(v);
483 goto onError;
484 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000485
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000486 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000487
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000488 onError:
489 return NULL;
490}
491
Guido van Rossum234f9421993-06-17 12:35:49 +0000492static void
Fred Drakeba096332000-07-09 07:04:36 +0000493string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000494{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000495 switch (PyString_CHECK_INTERNED(op)) {
496 case SSTATE_NOT_INTERNED:
497 break;
498
499 case SSTATE_INTERNED_MORTAL:
500 /* revive dead object temporarily for DelItem */
501 op->ob_refcnt = 3;
502 if (PyDict_DelItem(interned, op) != 0)
503 Py_FatalError(
504 "deletion of interned string failed");
505 break;
506
507 case SSTATE_INTERNED_IMMORTAL:
508 Py_FatalError("Immortal interned string died.");
509
510 default:
511 Py_FatalError("Inconsistent interned string state.");
512 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000513 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000514}
515
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000516/* Unescape a backslash-escaped string. If unicode is non-zero,
517 the string is a u-literal. If recode_encoding is non-zero,
518 the string is UTF-8 encoded and should be re-encoded in the
519 specified encoding. */
520
521PyObject *PyString_DecodeEscape(const char *s,
522 int len,
523 const char *errors,
524 int unicode,
525 const char *recode_encoding)
526{
527 int c;
528 char *p, *buf;
529 const char *end;
530 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000531 int newlen = recode_encoding ? 4*len:len;
532 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000533 if (v == NULL)
534 return NULL;
535 p = buf = PyString_AsString(v);
536 end = s + len;
537 while (s < end) {
538 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000539 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540#ifdef Py_USING_UNICODE
541 if (recode_encoding && (*s & 0x80)) {
542 PyObject *u, *w;
543 char *r;
544 const char* t;
545 int rn;
546 t = s;
547 /* Decode non-ASCII bytes as UTF-8. */
548 while (t < end && (*t & 0x80)) t++;
549 u = PyUnicode_DecodeUTF8(s, t - s, errors);
550 if(!u) goto failed;
551
552 /* Recode them in target encoding. */
553 w = PyUnicode_AsEncodedString(
554 u, recode_encoding, errors);
555 Py_DECREF(u);
556 if (!w) goto failed;
557
558 /* Append bytes to output buffer. */
559 r = PyString_AsString(w);
560 rn = PyString_Size(w);
561 memcpy(p, r, rn);
562 p += rn;
563 Py_DECREF(w);
564 s = t;
565 } else {
566 *p++ = *s++;
567 }
568#else
569 *p++ = *s++;
570#endif
571 continue;
572 }
573 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000574 if (s==end) {
575 PyErr_SetString(PyExc_ValueError,
576 "Trailing \\ in string");
577 goto failed;
578 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000579 switch (*s++) {
580 /* XXX This assumes ASCII! */
581 case '\n': break;
582 case '\\': *p++ = '\\'; break;
583 case '\'': *p++ = '\''; break;
584 case '\"': *p++ = '\"'; break;
585 case 'b': *p++ = '\b'; break;
586 case 'f': *p++ = '\014'; break; /* FF */
587 case 't': *p++ = '\t'; break;
588 case 'n': *p++ = '\n'; break;
589 case 'r': *p++ = '\r'; break;
590 case 'v': *p++ = '\013'; break; /* VT */
591 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
592 case '0': case '1': case '2': case '3':
593 case '4': case '5': case '6': case '7':
594 c = s[-1] - '0';
595 if ('0' <= *s && *s <= '7') {
596 c = (c<<3) + *s++ - '0';
597 if ('0' <= *s && *s <= '7')
598 c = (c<<3) + *s++ - '0';
599 }
600 *p++ = c;
601 break;
602 case 'x':
603 if (isxdigit(Py_CHARMASK(s[0]))
604 && isxdigit(Py_CHARMASK(s[1]))) {
605 unsigned int x = 0;
606 c = Py_CHARMASK(*s);
607 s++;
608 if (isdigit(c))
609 x = c - '0';
610 else if (islower(c))
611 x = 10 + c - 'a';
612 else
613 x = 10 + c - 'A';
614 x = x << 4;
615 c = Py_CHARMASK(*s);
616 s++;
617 if (isdigit(c))
618 x += c - '0';
619 else if (islower(c))
620 x += 10 + c - 'a';
621 else
622 x += 10 + c - 'A';
623 *p++ = x;
624 break;
625 }
626 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000627 PyErr_SetString(PyExc_ValueError,
628 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000629 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000630 }
631 if (strcmp(errors, "replace") == 0) {
632 *p++ = '?';
633 } else if (strcmp(errors, "ignore") == 0)
634 /* do nothing */;
635 else {
636 PyErr_Format(PyExc_ValueError,
637 "decoding error; "
638 "unknown error handling code: %.400s",
639 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000640 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000641 }
642#ifndef Py_USING_UNICODE
643 case 'u':
644 case 'U':
645 case 'N':
646 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000647 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 "Unicode escapes not legal "
649 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000650 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000651 }
652#endif
653 default:
654 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000655 s--;
656 goto non_esc; /* an arbitry number of unescaped
657 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000658 }
659 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000660 if (p-buf < newlen)
661 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000662 return v;
663 failed:
664 Py_DECREF(v);
665 return NULL;
666}
667
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000668static int
669string_getsize(register PyObject *op)
670{
671 char *s;
672 int len;
673 if (PyString_AsStringAndSize(op, &s, &len))
674 return -1;
675 return len;
676}
677
678static /*const*/ char *
679string_getbuffer(register PyObject *op)
680{
681 char *s;
682 int len;
683 if (PyString_AsStringAndSize(op, &s, &len))
684 return NULL;
685 return s;
686}
687
Guido van Rossumd7047b31995-01-02 19:07:15 +0000688int
Fred Drakeba096332000-07-09 07:04:36 +0000689PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000690{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000691 if (!PyString_Check(op))
692 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000694}
695
696/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000697PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000699 if (!PyString_Check(op))
700 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702}
703
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704int
705PyString_AsStringAndSize(register PyObject *obj,
706 register char **s,
707 register int *len)
708{
709 if (s == NULL) {
710 PyErr_BadInternalCall();
711 return -1;
712 }
713
714 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000715#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (PyUnicode_Check(obj)) {
717 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
718 if (obj == NULL)
719 return -1;
720 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000721 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000722#endif
723 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 PyErr_Format(PyExc_TypeError,
725 "expected string or Unicode object, "
726 "%.200s found", obj->ob_type->tp_name);
727 return -1;
728 }
729 }
730
731 *s = PyString_AS_STRING(obj);
732 if (len != NULL)
733 *len = PyString_GET_SIZE(obj);
734 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
735 PyErr_SetString(PyExc_TypeError,
736 "expected string without null bytes");
737 return -1;
738 }
739 return 0;
740}
741
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742/* Methods */
743
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000744static int
Fred Drakeba096332000-07-09 07:04:36 +0000745string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000746{
747 int i;
748 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000749 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000750
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000751 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000752 if (! PyString_CheckExact(op)) {
753 int ret;
754 /* A str subclass may have its own __str__ method. */
755 op = (PyStringObject *) PyObject_Str((PyObject *)op);
756 if (op == NULL)
757 return -1;
758 ret = string_print(op, fp, flags);
759 Py_DECREF(op);
760 return ret;
761 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000762 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000763#ifdef __VMS
764 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
765#else
766 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
767#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000768 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000770
Thomas Wouters7e474022000-07-16 12:04:32 +0000771 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000772 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000773 if (memchr(op->ob_sval, '\'', op->ob_size) &&
774 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775 quote = '"';
776
777 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000778 for (i = 0; i < op->ob_size; i++) {
779 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000781 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000782 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000783 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000784 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000785 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000786 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000787 fprintf(fp, "\\r");
788 else if (c < ' ' || c >= 0x7f)
789 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000790 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000791 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000794 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000795}
796
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000797PyObject *
798PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000800 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000801 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000802 PyObject *v;
803 if (newsize > INT_MAX) {
804 PyErr_SetString(PyExc_OverflowError,
805 "string is too large to make repr");
806 }
807 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000809 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000810 }
811 else {
812 register int i;
813 register char c;
814 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815 int quote;
816
Thomas Wouters7e474022000-07-16 12:04:32 +0000817 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000818 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000819 if (smartquotes &&
820 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000821 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000822 quote = '"';
823
Tim Peters9161c8b2001-12-03 01:55:38 +0000824 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000825 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000826 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000827 /* There's at least enough room for a hex escape
828 and a closing quote. */
829 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000833 else if (c == '\t')
834 *p++ = '\\', *p++ = 't';
835 else if (c == '\n')
836 *p++ = '\\', *p++ = 'n';
837 else if (c == '\r')
838 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 else if (c < ' ' || c >= 0x7f) {
840 /* For performance, we don't want to call
841 PyOS_snprintf here (extra layers of
842 function call). */
843 sprintf(p, "\\x%02x", c & 0xff);
844 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000845 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 else
847 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000849 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000850 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000852 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000853 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000854 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856}
857
Guido van Rossum189f1df2001-05-01 16:51:53 +0000858static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000859string_repr(PyObject *op)
860{
861 return PyString_Repr(op, 1);
862}
863
864static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000865string_str(PyObject *s)
866{
Tim Petersc9933152001-10-16 20:18:24 +0000867 assert(PyString_Check(s));
868 if (PyString_CheckExact(s)) {
869 Py_INCREF(s);
870 return s;
871 }
872 else {
873 /* Subtype -- return genuine string with the same value. */
874 PyStringObject *t = (PyStringObject *) s;
875 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
876 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000877}
878
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879static int
Fred Drakeba096332000-07-09 07:04:36 +0000880string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881{
882 return a->ob_size;
883}
884
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000885static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000886string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887{
888 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000889 register PyStringObject *op;
890 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000891#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000892 if (PyUnicode_Check(bb))
893 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000894#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000895 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000896 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000897 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000898 return NULL;
899 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000900#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000902 if ((a->ob_size == 0 || b->ob_size == 0) &&
903 PyString_CheckExact(a) && PyString_CheckExact(b)) {
904 if (a->ob_size == 0) {
905 Py_INCREF(bb);
906 return bb;
907 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 Py_INCREF(a);
909 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000910 }
911 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000912 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000913 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000914 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000915 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000916 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000917 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000918 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000919 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
920 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
921 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000922 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923#undef b
924}
925
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000926static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000927string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000928{
929 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000930 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000931 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000933 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934 if (n < 0)
935 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000936 /* watch out for overflows: the size can overflow int,
937 * and the # of bytes needed can overflow size_t
938 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000940 if (n && size / n != a->ob_size) {
941 PyErr_SetString(PyExc_OverflowError,
942 "repeated string is too long");
943 return NULL;
944 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000945 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 Py_INCREF(a);
947 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 }
Tim Peterse7c05322004-06-27 17:24:49 +0000949 nbytes = (size_t)size;
950 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000951 PyErr_SetString(PyExc_OverflowError,
952 "repeated string is too long");
953 return NULL;
954 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000956 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000957 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000959 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000960 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000961 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000962 op->ob_sval[size] = '\0';
963 if (a->ob_size == 1 && n > 0) {
964 memset(op->ob_sval, a->ob_sval[0] , n);
965 return (PyObject *) op;
966 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000967 i = 0;
968 if (i < size) {
969 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
970 i = (int) a->ob_size;
971 }
972 while (i < size) {
973 j = (i <= size-i) ? i : size-i;
974 memcpy(op->ob_sval+i, op->ob_sval, j);
975 i += j;
976 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978}
979
980/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
981
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000983string_slice(register PyStringObject *a, register int i, register int j)
984 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985{
986 if (i < 0)
987 i = 0;
988 if (j < 0)
989 j = 0; /* Avoid signed/unsigned bug in next line */
990 if (j > a->ob_size)
991 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000992 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
993 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 Py_INCREF(a);
995 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 }
997 if (j < i)
998 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001000}
1001
Guido van Rossum9284a572000-03-07 15:53:43 +00001002static int
Fred Drakeba096332000-07-09 07:04:36 +00001003string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001004{
Barry Warsaw817918c2002-08-06 16:58:21 +00001005 const char *lhs, *rhs, *end;
1006 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001007
1008 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001009#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001010 if (PyUnicode_Check(el))
1011 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001012#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001013 if (!PyString_Check(el)) {
1014 PyErr_SetString(PyExc_TypeError,
1015 "'in <string>' requires string as left operand");
1016 return -1;
1017 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001018 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001019 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001020 rhs = PyString_AS_STRING(el);
1021 lhs = PyString_AS_STRING(a);
1022
1023 /* optimize for a single character */
1024 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001025 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001026
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001027 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001028 while (lhs <= end) {
1029 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001030 return 1;
1031 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001032
Guido van Rossum9284a572000-03-07 15:53:43 +00001033 return 0;
1034}
1035
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001036static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001037string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001038{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001039 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001040 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001041 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001042 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001043 return NULL;
1044 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001045 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001046 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001047 if (v == NULL)
1048 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001049 else {
1050#ifdef COUNT_ALLOCS
1051 one_strings++;
1052#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001053 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001054 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001055 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001056}
1057
Martin v. Löwiscd353062001-05-24 16:56:35 +00001058static PyObject*
1059string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001060{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001061 int c;
1062 int len_a, len_b;
1063 int min_len;
1064 PyObject *result;
1065
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001066 /* Make sure both arguments are strings. */
1067 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001068 result = Py_NotImplemented;
1069 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001070 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001071 if (a == b) {
1072 switch (op) {
1073 case Py_EQ:case Py_LE:case Py_GE:
1074 result = Py_True;
1075 goto out;
1076 case Py_NE:case Py_LT:case Py_GT:
1077 result = Py_False;
1078 goto out;
1079 }
1080 }
1081 if (op == Py_EQ) {
1082 /* Supporting Py_NE here as well does not save
1083 much time, since Py_NE is rarely used. */
1084 if (a->ob_size == b->ob_size
1085 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001086 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001087 a->ob_size) == 0)) {
1088 result = Py_True;
1089 } else {
1090 result = Py_False;
1091 }
1092 goto out;
1093 }
1094 len_a = a->ob_size; len_b = b->ob_size;
1095 min_len = (len_a < len_b) ? len_a : len_b;
1096 if (min_len > 0) {
1097 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1098 if (c==0)
1099 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1100 }else
1101 c = 0;
1102 if (c == 0)
1103 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1104 switch (op) {
1105 case Py_LT: c = c < 0; break;
1106 case Py_LE: c = c <= 0; break;
1107 case Py_EQ: assert(0); break; /* unreachable */
1108 case Py_NE: c = c != 0; break;
1109 case Py_GT: c = c > 0; break;
1110 case Py_GE: c = c >= 0; break;
1111 default:
1112 result = Py_NotImplemented;
1113 goto out;
1114 }
1115 result = c ? Py_True : Py_False;
1116 out:
1117 Py_INCREF(result);
1118 return result;
1119}
1120
1121int
1122_PyString_Eq(PyObject *o1, PyObject *o2)
1123{
1124 PyStringObject *a, *b;
1125 a = (PyStringObject*)o1;
1126 b = (PyStringObject*)o2;
1127 return a->ob_size == b->ob_size
1128 && *a->ob_sval == *b->ob_sval
1129 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001130}
1131
Guido van Rossum9bfef441993-03-29 10:43:31 +00001132static long
Fred Drakeba096332000-07-09 07:04:36 +00001133string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001134{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001135 register int len;
1136 register unsigned char *p;
1137 register long x;
1138
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001139 if (a->ob_shash != -1)
1140 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001141 len = a->ob_size;
1142 p = (unsigned char *) a->ob_sval;
1143 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001144 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001145 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001146 x ^= a->ob_size;
1147 if (x == -1)
1148 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001149 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001150 return x;
1151}
1152
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001153static PyObject*
1154string_subscript(PyStringObject* self, PyObject* item)
1155{
1156 if (PyInt_Check(item)) {
1157 long i = PyInt_AS_LONG(item);
1158 if (i < 0)
1159 i += PyString_GET_SIZE(self);
1160 return string_item(self,i);
1161 }
1162 else if (PyLong_Check(item)) {
1163 long i = PyLong_AsLong(item);
1164 if (i == -1 && PyErr_Occurred())
1165 return NULL;
1166 if (i < 0)
1167 i += PyString_GET_SIZE(self);
1168 return string_item(self,i);
1169 }
1170 else if (PySlice_Check(item)) {
1171 int start, stop, step, slicelength, cur, i;
1172 char* source_buf;
1173 char* result_buf;
1174 PyObject* result;
1175
1176 if (PySlice_GetIndicesEx((PySliceObject*)item,
1177 PyString_GET_SIZE(self),
1178 &start, &stop, &step, &slicelength) < 0) {
1179 return NULL;
1180 }
1181
1182 if (slicelength <= 0) {
1183 return PyString_FromStringAndSize("", 0);
1184 }
1185 else {
1186 source_buf = PyString_AsString((PyObject*)self);
1187 result_buf = PyMem_Malloc(slicelength);
1188
1189 for (cur = start, i = 0; i < slicelength;
1190 cur += step, i++) {
1191 result_buf[i] = source_buf[cur];
1192 }
1193
1194 result = PyString_FromStringAndSize(result_buf,
1195 slicelength);
1196 PyMem_Free(result_buf);
1197 return result;
1198 }
1199 }
1200 else {
1201 PyErr_SetString(PyExc_TypeError,
1202 "string indices must be integers");
1203 return NULL;
1204 }
1205}
1206
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001207static int
Fred Drakeba096332000-07-09 07:04:36 +00001208string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001209{
1210 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001211 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001212 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001213 return -1;
1214 }
1215 *ptr = (void *)self->ob_sval;
1216 return self->ob_size;
1217}
1218
1219static int
Fred Drakeba096332000-07-09 07:04:36 +00001220string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001221{
Guido van Rossum045e6881997-09-08 18:30:11 +00001222 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001223 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001224 return -1;
1225}
1226
1227static int
Fred Drakeba096332000-07-09 07:04:36 +00001228string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001229{
1230 if ( lenp )
1231 *lenp = self->ob_size;
1232 return 1;
1233}
1234
Guido van Rossum1db70701998-10-08 02:18:52 +00001235static int
Fred Drakeba096332000-07-09 07:04:36 +00001236string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001237{
1238 if ( index != 0 ) {
1239 PyErr_SetString(PyExc_SystemError,
1240 "accessing non-existent string segment");
1241 return -1;
1242 }
1243 *ptr = self->ob_sval;
1244 return self->ob_size;
1245}
1246
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001247static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001248 (inquiry)string_length, /*sq_length*/
1249 (binaryfunc)string_concat, /*sq_concat*/
1250 (intargfunc)string_repeat, /*sq_repeat*/
1251 (intargfunc)string_item, /*sq_item*/
1252 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001253 0, /*sq_ass_item*/
1254 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001255 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001256};
1257
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001258static PyMappingMethods string_as_mapping = {
1259 (inquiry)string_length,
1260 (binaryfunc)string_subscript,
1261 0,
1262};
1263
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001264static PyBufferProcs string_as_buffer = {
1265 (getreadbufferproc)string_buffer_getreadbuf,
1266 (getwritebufferproc)string_buffer_getwritebuf,
1267 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001268 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001269};
1270
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001271
1272
1273#define LEFTSTRIP 0
1274#define RIGHTSTRIP 1
1275#define BOTHSTRIP 2
1276
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001277/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001278static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1279
1280#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001281
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001282#define SPLIT_APPEND(data, left, right) \
1283 str = PyString_FromStringAndSize((data) + (left), \
1284 (right) - (left)); \
1285 if (str == NULL) \
1286 goto onError; \
1287 if (PyList_Append(list, str)) { \
1288 Py_DECREF(str); \
1289 goto onError; \
1290 } \
1291 else \
1292 Py_DECREF(str);
1293
1294#define SPLIT_INSERT(data, left, right) \
1295 str = PyString_FromStringAndSize((data) + (left), \
1296 (right) - (left)); \
1297 if (str == NULL) \
1298 goto onError; \
1299 if (PyList_Insert(list, 0, str)) { \
1300 Py_DECREF(str); \
1301 goto onError; \
1302 } \
1303 else \
1304 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305
1306static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001307split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001308{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001309 int i, j;
1310 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001311 PyObject *list = PyList_New(0);
1312
1313 if (list == NULL)
1314 return NULL;
1315
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316 for (i = j = 0; i < len; ) {
1317 while (i < len && isspace(Py_CHARMASK(s[i])))
1318 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001320 while (i < len && !isspace(Py_CHARMASK(s[i])))
1321 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001322 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001323 if (maxsplit-- <= 0)
1324 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001325 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001326 while (i < len && isspace(Py_CHARMASK(s[i])))
1327 i++;
1328 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001329 }
1330 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001331 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001332 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001333 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001335 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336 Py_DECREF(list);
1337 return NULL;
1338}
1339
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001340static PyObject *
1341split_char(const char *s, int len, char ch, int maxcount)
1342{
1343 register int i, j;
1344 PyObject *str;
1345 PyObject *list = PyList_New(0);
1346
1347 if (list == NULL)
1348 return NULL;
1349
1350 for (i = j = 0; i < len; ) {
1351 if (s[i] == ch) {
1352 if (maxcount-- <= 0)
1353 break;
1354 SPLIT_APPEND(s, j, i);
1355 i = j = i + 1;
1356 } else
1357 i++;
1358 }
1359 if (j <= len) {
1360 SPLIT_APPEND(s, j, len);
1361 }
1362 return list;
1363
1364 onError:
1365 Py_DECREF(list);
1366 return NULL;
1367}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001369PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001370"S.split([sep [,maxsplit]]) -> list of strings\n\
1371\n\
1372Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001373delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001374splits are done. If sep is not specified or is None, any\n\
1375whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376
1377static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001378string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379{
1380 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001381 int maxsplit = -1;
1382 const char *s = PyString_AS_STRING(self), *sub;
1383 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001384
Guido van Rossum4c08d552000-03-10 22:55:18 +00001385 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001386 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001387 if (maxsplit < 0)
1388 maxsplit = INT_MAX;
1389 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001390 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001391 if (PyString_Check(subobj)) {
1392 sub = PyString_AS_STRING(subobj);
1393 n = PyString_GET_SIZE(subobj);
1394 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001395#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001396 else if (PyUnicode_Check(subobj))
1397 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001398#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001399 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1400 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001401
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 if (n == 0) {
1403 PyErr_SetString(PyExc_ValueError, "empty separator");
1404 return NULL;
1405 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001406 else if (n == 1)
1407 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408
1409 list = PyList_New(0);
1410 if (list == NULL)
1411 return NULL;
1412
1413 i = j = 0;
1414 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001415 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001416 if (maxsplit-- <= 0)
1417 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1419 if (item == NULL)
1420 goto fail;
1421 err = PyList_Append(list, item);
1422 Py_DECREF(item);
1423 if (err < 0)
1424 goto fail;
1425 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426 }
1427 else
1428 i++;
1429 }
1430 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1431 if (item == NULL)
1432 goto fail;
1433 err = PyList_Append(list, item);
1434 Py_DECREF(item);
1435 if (err < 0)
1436 goto fail;
1437
1438 return list;
1439
1440 fail:
1441 Py_DECREF(list);
1442 return NULL;
1443}
1444
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001445static PyObject *
1446rsplit_whitespace(const char *s, int len, int maxsplit)
1447{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001448 int i, j;
1449 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001450 PyObject *list = PyList_New(0);
1451
1452 if (list == NULL)
1453 return NULL;
1454
1455 for (i = j = len - 1; i >= 0; ) {
1456 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1457 i--;
1458 j = i;
1459 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1460 i--;
1461 if (j > i) {
1462 if (maxsplit-- <= 0)
1463 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001464 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001465 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1466 i--;
1467 j = i;
1468 }
1469 }
1470 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001471 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001472 }
1473 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001474 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001475 Py_DECREF(list);
1476 return NULL;
1477}
1478
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001479static PyObject *
1480rsplit_char(const char *s, int len, char ch, int maxcount)
1481{
1482 register int i, j;
1483 PyObject *str;
1484 PyObject *list = PyList_New(0);
1485
1486 if (list == NULL)
1487 return NULL;
1488
1489 for (i = j = len - 1; i >= 0; ) {
1490 if (s[i] == ch) {
1491 if (maxcount-- <= 0)
1492 break;
1493 SPLIT_INSERT(s, i + 1, j + 1);
1494 j = i = i - 1;
1495 } else
1496 i--;
1497 }
1498 if (j >= -1) {
1499 SPLIT_INSERT(s, 0, j + 1);
1500 }
1501 return list;
1502
1503 onError:
1504 Py_DECREF(list);
1505 return NULL;
1506}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001507
1508PyDoc_STRVAR(rsplit__doc__,
1509"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1510\n\
1511Return a list of the words in the string S, using sep as the\n\
1512delimiter string, starting at the end of the string and working\n\
1513to the front. If maxsplit is given, at most maxsplit splits are\n\
1514done. If sep is not specified or is None, any whitespace string\n\
1515is a separator.");
1516
1517static PyObject *
1518string_rsplit(PyStringObject *self, PyObject *args)
1519{
1520 int len = PyString_GET_SIZE(self), n, i, j, err;
1521 int maxsplit = -1;
1522 const char *s = PyString_AS_STRING(self), *sub;
1523 PyObject *list, *item, *subobj = Py_None;
1524
1525 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1526 return NULL;
1527 if (maxsplit < 0)
1528 maxsplit = INT_MAX;
1529 if (subobj == Py_None)
1530 return rsplit_whitespace(s, len, maxsplit);
1531 if (PyString_Check(subobj)) {
1532 sub = PyString_AS_STRING(subobj);
1533 n = PyString_GET_SIZE(subobj);
1534 }
1535#ifdef Py_USING_UNICODE
1536 else if (PyUnicode_Check(subobj))
1537 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1538#endif
1539 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1540 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001541
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001542 if (n == 0) {
1543 PyErr_SetString(PyExc_ValueError, "empty separator");
1544 return NULL;
1545 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001546 else if (n == 1)
1547 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001548
1549 list = PyList_New(0);
1550 if (list == NULL)
1551 return NULL;
1552
1553 j = len;
1554 i = j - n;
1555 while (i >= 0) {
1556 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1557 if (maxsplit-- <= 0)
1558 break;
1559 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1560 if (item == NULL)
1561 goto fail;
1562 err = PyList_Insert(list, 0, item);
1563 Py_DECREF(item);
1564 if (err < 0)
1565 goto fail;
1566 j = i;
1567 i -= n;
1568 }
1569 else
1570 i--;
1571 }
1572 item = PyString_FromStringAndSize(s, j);
1573 if (item == NULL)
1574 goto fail;
1575 err = PyList_Insert(list, 0, item);
1576 Py_DECREF(item);
1577 if (err < 0)
1578 goto fail;
1579
1580 return list;
1581
1582 fail:
1583 Py_DECREF(list);
1584 return NULL;
1585}
1586
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001588PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589"S.join(sequence) -> string\n\
1590\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001591Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001592sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593
1594static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001595string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596{
1597 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001598 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600 char *p;
1601 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001602 size_t sz = 0;
1603 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001604 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001605
Tim Peters19fe14e2001-01-19 03:03:47 +00001606 seq = PySequence_Fast(orig, "");
1607 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001608 if (PyErr_ExceptionMatches(PyExc_TypeError))
1609 PyErr_Format(PyExc_TypeError,
1610 "sequence expected, %.80s found",
1611 orig->ob_type->tp_name);
1612 return NULL;
1613 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001614
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001615 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001616 if (seqlen == 0) {
1617 Py_DECREF(seq);
1618 return PyString_FromString("");
1619 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001621 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001622 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1623 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001624 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001625 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001626 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001628
Raymond Hettinger674f2412004-08-23 23:23:54 +00001629 /* There are at least two things to join, or else we have a subclass
1630 * of the builtin types in the sequence.
1631 * Do a pre-pass to figure out the total amount of space we'll
1632 * need (sz), see whether any argument is absurd, and defer to
1633 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001634 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001635 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001636 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001637 item = PySequence_Fast_GET_ITEM(seq, i);
1638 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001639#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001640 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001641 /* Defer to Unicode join.
1642 * CAUTION: There's no gurantee that the
1643 * original sequence can be iterated over
1644 * again, so we must pass seq here.
1645 */
1646 PyObject *result;
1647 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001648 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001649 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001650 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001651#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001652 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001653 "sequence item %i: expected string,"
1654 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001655 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001656 Py_DECREF(seq);
1657 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001658 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001659 sz += PyString_GET_SIZE(item);
1660 if (i != 0)
1661 sz += seplen;
1662 if (sz < old_sz || sz > INT_MAX) {
1663 PyErr_SetString(PyExc_OverflowError,
1664 "join() is too long for a Python string");
1665 Py_DECREF(seq);
1666 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001667 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001668 }
1669
1670 /* Allocate result space. */
1671 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1672 if (res == NULL) {
1673 Py_DECREF(seq);
1674 return NULL;
1675 }
1676
1677 /* Catenate everything. */
1678 p = PyString_AS_STRING(res);
1679 for (i = 0; i < seqlen; ++i) {
1680 size_t n;
1681 item = PySequence_Fast_GET_ITEM(seq, i);
1682 n = PyString_GET_SIZE(item);
1683 memcpy(p, PyString_AS_STRING(item), n);
1684 p += n;
1685 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001686 memcpy(p, sep, seplen);
1687 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001688 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001690
Jeremy Hylton49048292000-07-11 03:28:17 +00001691 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001693}
1694
Tim Peters52e155e2001-06-16 05:42:57 +00001695PyObject *
1696_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001697{
Tim Petersa7259592001-06-16 05:11:17 +00001698 assert(sep != NULL && PyString_Check(sep));
1699 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001700 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001701}
1702
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001703static void
1704string_adjust_indices(int *start, int *end, int len)
1705{
1706 if (*end > len)
1707 *end = len;
1708 else if (*end < 0)
1709 *end += len;
1710 if (*end < 0)
1711 *end = 0;
1712 if (*start < 0)
1713 *start += len;
1714 if (*start < 0)
1715 *start = 0;
1716}
1717
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718static long
Fred Drakeba096332000-07-09 07:04:36 +00001719string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001721 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722 int len = PyString_GET_SIZE(self);
1723 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001724 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001726 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001727 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001728 return -2;
1729 if (PyString_Check(subobj)) {
1730 sub = PyString_AS_STRING(subobj);
1731 n = PyString_GET_SIZE(subobj);
1732 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001733#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001734 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001735 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001736#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738 return -2;
1739
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001740 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741
Guido van Rossum4c08d552000-03-10 22:55:18 +00001742 if (dir > 0) {
1743 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745 last -= n;
1746 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001747 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 return (long)i;
1749 }
1750 else {
1751 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001752
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753 if (n == 0 && i <= last)
1754 return (long)last;
1755 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001756 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001757 return (long)j;
1758 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001759
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760 return -1;
1761}
1762
1763
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001764PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765"S.find(sub [,start [,end]]) -> int\n\
1766\n\
1767Return the lowest index in S where substring sub is found,\n\
1768such that sub is contained within s[start,end]. Optional\n\
1769arguments start and end are interpreted as in slice notation.\n\
1770\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001771Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772
1773static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001774string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001776 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777 if (result == -2)
1778 return NULL;
1779 return PyInt_FromLong(result);
1780}
1781
1782
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001783PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784"S.index(sub [,start [,end]]) -> int\n\
1785\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001786Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787
1788static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001789string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001791 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792 if (result == -2)
1793 return NULL;
1794 if (result == -1) {
1795 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001796 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797 return NULL;
1798 }
1799 return PyInt_FromLong(result);
1800}
1801
1802
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001803PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804"S.rfind(sub [,start [,end]]) -> int\n\
1805\n\
1806Return the highest index in S where substring sub is found,\n\
1807such that sub is contained within s[start,end]. Optional\n\
1808arguments start and end are interpreted as in slice notation.\n\
1809\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001810Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811
1812static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001813string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001815 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816 if (result == -2)
1817 return NULL;
1818 return PyInt_FromLong(result);
1819}
1820
1821
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001822PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823"S.rindex(sub [,start [,end]]) -> int\n\
1824\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001825Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826
1827static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001828string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001830 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 if (result == -2)
1832 return NULL;
1833 if (result == -1) {
1834 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001835 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836 return NULL;
1837 }
1838 return PyInt_FromLong(result);
1839}
1840
1841
1842static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001843do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1844{
1845 char *s = PyString_AS_STRING(self);
1846 int len = PyString_GET_SIZE(self);
1847 char *sep = PyString_AS_STRING(sepobj);
1848 int seplen = PyString_GET_SIZE(sepobj);
1849 int i, j;
1850
1851 i = 0;
1852 if (striptype != RIGHTSTRIP) {
1853 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1854 i++;
1855 }
1856 }
1857
1858 j = len;
1859 if (striptype != LEFTSTRIP) {
1860 do {
1861 j--;
1862 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1863 j++;
1864 }
1865
1866 if (i == 0 && j == len && PyString_CheckExact(self)) {
1867 Py_INCREF(self);
1868 return (PyObject*)self;
1869 }
1870 else
1871 return PyString_FromStringAndSize(s+i, j-i);
1872}
1873
1874
1875static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001876do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877{
1878 char *s = PyString_AS_STRING(self);
1879 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001881 i = 0;
1882 if (striptype != RIGHTSTRIP) {
1883 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1884 i++;
1885 }
1886 }
1887
1888 j = len;
1889 if (striptype != LEFTSTRIP) {
1890 do {
1891 j--;
1892 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1893 j++;
1894 }
1895
Tim Peters8fa5dd02001-09-12 02:18:30 +00001896 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897 Py_INCREF(self);
1898 return (PyObject*)self;
1899 }
1900 else
1901 return PyString_FromStringAndSize(s+i, j-i);
1902}
1903
1904
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001905static PyObject *
1906do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1907{
1908 PyObject *sep = NULL;
1909
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001910 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001911 return NULL;
1912
1913 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001914 if (PyString_Check(sep))
1915 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001916#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001917 else if (PyUnicode_Check(sep)) {
1918 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1919 PyObject *res;
1920 if (uniself==NULL)
1921 return NULL;
1922 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1923 striptype, sep);
1924 Py_DECREF(uniself);
1925 return res;
1926 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001927#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001928 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001929 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001930#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001931 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001932#else
1933 "%s arg must be None or str",
1934#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001935 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001936 return NULL;
1937 }
1938 return do_xstrip(self, striptype, sep);
1939 }
1940
1941 return do_strip(self, striptype);
1942}
1943
1944
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001945PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001946"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947\n\
1948Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001949whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001950If chars is given and not None, remove characters in chars instead.\n\
1951If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952
1953static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001954string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001956 if (PyTuple_GET_SIZE(args) == 0)
1957 return do_strip(self, BOTHSTRIP); /* Common case */
1958 else
1959 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960}
1961
1962
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001963PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001964"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001966Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001967If chars is given and not None, remove characters in chars instead.\n\
1968If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969
1970static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001971string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001973 if (PyTuple_GET_SIZE(args) == 0)
1974 return do_strip(self, LEFTSTRIP); /* Common case */
1975 else
1976 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977}
1978
1979
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001980PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001981"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001983Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001984If chars is given and not None, remove characters in chars instead.\n\
1985If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986
1987static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001988string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001990 if (PyTuple_GET_SIZE(args) == 0)
1991 return do_strip(self, RIGHTSTRIP); /* Common case */
1992 else
1993 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994}
1995
1996
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001997PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998"S.lower() -> string\n\
1999\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002000Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001
2002static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002003string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004{
2005 char *s = PyString_AS_STRING(self), *s_new;
2006 int i, n = PyString_GET_SIZE(self);
2007 PyObject *new;
2008
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009 new = PyString_FromStringAndSize(NULL, n);
2010 if (new == NULL)
2011 return NULL;
2012 s_new = PyString_AsString(new);
2013 for (i = 0; i < n; i++) {
2014 int c = Py_CHARMASK(*s++);
2015 if (isupper(c)) {
2016 *s_new = tolower(c);
2017 } else
2018 *s_new = c;
2019 s_new++;
2020 }
2021 return new;
2022}
2023
2024
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002025PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026"S.upper() -> string\n\
2027\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002028Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029
2030static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002031string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032{
2033 char *s = PyString_AS_STRING(self), *s_new;
2034 int i, n = PyString_GET_SIZE(self);
2035 PyObject *new;
2036
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037 new = PyString_FromStringAndSize(NULL, n);
2038 if (new == NULL)
2039 return NULL;
2040 s_new = PyString_AsString(new);
2041 for (i = 0; i < n; i++) {
2042 int c = Py_CHARMASK(*s++);
2043 if (islower(c)) {
2044 *s_new = toupper(c);
2045 } else
2046 *s_new = c;
2047 s_new++;
2048 }
2049 return new;
2050}
2051
2052
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002053PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002054"S.title() -> string\n\
2055\n\
2056Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002057characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002058
2059static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002060string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061{
2062 char *s = PyString_AS_STRING(self), *s_new;
2063 int i, n = PyString_GET_SIZE(self);
2064 int previous_is_cased = 0;
2065 PyObject *new;
2066
Guido van Rossum4c08d552000-03-10 22:55:18 +00002067 new = PyString_FromStringAndSize(NULL, n);
2068 if (new == NULL)
2069 return NULL;
2070 s_new = PyString_AsString(new);
2071 for (i = 0; i < n; i++) {
2072 int c = Py_CHARMASK(*s++);
2073 if (islower(c)) {
2074 if (!previous_is_cased)
2075 c = toupper(c);
2076 previous_is_cased = 1;
2077 } else if (isupper(c)) {
2078 if (previous_is_cased)
2079 c = tolower(c);
2080 previous_is_cased = 1;
2081 } else
2082 previous_is_cased = 0;
2083 *s_new++ = c;
2084 }
2085 return new;
2086}
2087
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002088PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089"S.capitalize() -> string\n\
2090\n\
2091Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002092capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093
2094static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002095string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096{
2097 char *s = PyString_AS_STRING(self), *s_new;
2098 int i, n = PyString_GET_SIZE(self);
2099 PyObject *new;
2100
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101 new = PyString_FromStringAndSize(NULL, n);
2102 if (new == NULL)
2103 return NULL;
2104 s_new = PyString_AsString(new);
2105 if (0 < n) {
2106 int c = Py_CHARMASK(*s++);
2107 if (islower(c))
2108 *s_new = toupper(c);
2109 else
2110 *s_new = c;
2111 s_new++;
2112 }
2113 for (i = 1; i < n; i++) {
2114 int c = Py_CHARMASK(*s++);
2115 if (isupper(c))
2116 *s_new = tolower(c);
2117 else
2118 *s_new = c;
2119 s_new++;
2120 }
2121 return new;
2122}
2123
2124
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002125PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126"S.count(sub[, start[, end]]) -> int\n\
2127\n\
2128Return the number of occurrences of substring sub in string\n\
2129S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002130interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131
2132static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002133string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002135 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136 int len = PyString_GET_SIZE(self), n;
2137 int i = 0, last = INT_MAX;
2138 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002139 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140
Guido van Rossumc6821402000-05-08 14:08:05 +00002141 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2142 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002144
Guido van Rossum4c08d552000-03-10 22:55:18 +00002145 if (PyString_Check(subobj)) {
2146 sub = PyString_AS_STRING(subobj);
2147 n = PyString_GET_SIZE(subobj);
2148 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002149#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002150 else if (PyUnicode_Check(subobj)) {
2151 int count;
2152 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2153 if (count == -1)
2154 return NULL;
2155 else
2156 return PyInt_FromLong((long) count);
2157 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002158#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002159 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2160 return NULL;
2161
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002162 string_adjust_indices(&i, &last, len);
2163
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164 m = last + 1 - n;
2165 if (n == 0)
2166 return PyInt_FromLong((long) (m-i));
2167
2168 r = 0;
2169 while (i < m) {
2170 if (!memcmp(s+i, sub, n)) {
2171 r++;
2172 i += n;
2173 } else {
2174 i++;
2175 }
2176 }
2177 return PyInt_FromLong((long) r);
2178}
2179
2180
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002181PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002182"S.swapcase() -> string\n\
2183\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002184Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002185converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002186
2187static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002188string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189{
2190 char *s = PyString_AS_STRING(self), *s_new;
2191 int i, n = PyString_GET_SIZE(self);
2192 PyObject *new;
2193
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194 new = PyString_FromStringAndSize(NULL, n);
2195 if (new == NULL)
2196 return NULL;
2197 s_new = PyString_AsString(new);
2198 for (i = 0; i < n; i++) {
2199 int c = Py_CHARMASK(*s++);
2200 if (islower(c)) {
2201 *s_new = toupper(c);
2202 }
2203 else if (isupper(c)) {
2204 *s_new = tolower(c);
2205 }
2206 else
2207 *s_new = c;
2208 s_new++;
2209 }
2210 return new;
2211}
2212
2213
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002214PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215"S.translate(table [,deletechars]) -> string\n\
2216\n\
2217Return a copy of the string S, where all characters occurring\n\
2218in the optional argument deletechars are removed, and the\n\
2219remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002220translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221
2222static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002223string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002225 register char *input, *output;
2226 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 register int i, c, changed = 0;
2228 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002229 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 int inlen, tablen, dellen = 0;
2231 PyObject *result;
2232 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002233 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002235 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002238
2239 if (PyString_Check(tableobj)) {
2240 table1 = PyString_AS_STRING(tableobj);
2241 tablen = PyString_GET_SIZE(tableobj);
2242 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002243#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002244 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002245 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246 parameter; instead a mapping to None will cause characters
2247 to be deleted. */
2248 if (delobj != NULL) {
2249 PyErr_SetString(PyExc_TypeError,
2250 "deletions are implemented differently for unicode");
2251 return NULL;
2252 }
2253 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2254 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002255#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002258
Martin v. Löwis00b61272002-12-12 20:03:19 +00002259 if (tablen != 256) {
2260 PyErr_SetString(PyExc_ValueError,
2261 "translation table must be 256 characters long");
2262 return NULL;
2263 }
2264
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265 if (delobj != NULL) {
2266 if (PyString_Check(delobj)) {
2267 del_table = PyString_AS_STRING(delobj);
2268 dellen = PyString_GET_SIZE(delobj);
2269 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002270#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002271 else if (PyUnicode_Check(delobj)) {
2272 PyErr_SetString(PyExc_TypeError,
2273 "deletions are implemented differently for unicode");
2274 return NULL;
2275 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002276#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002277 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2278 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002279 }
2280 else {
2281 del_table = NULL;
2282 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002283 }
2284
2285 table = table1;
2286 inlen = PyString_Size(input_obj);
2287 result = PyString_FromStringAndSize((char *)NULL, inlen);
2288 if (result == NULL)
2289 return NULL;
2290 output_start = output = PyString_AsString(result);
2291 input = PyString_AsString(input_obj);
2292
2293 if (dellen == 0) {
2294 /* If no deletions are required, use faster code */
2295 for (i = inlen; --i >= 0; ) {
2296 c = Py_CHARMASK(*input++);
2297 if (Py_CHARMASK((*output++ = table[c])) != c)
2298 changed = 1;
2299 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002300 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301 return result;
2302 Py_DECREF(result);
2303 Py_INCREF(input_obj);
2304 return input_obj;
2305 }
2306
2307 for (i = 0; i < 256; i++)
2308 trans_table[i] = Py_CHARMASK(table[i]);
2309
2310 for (i = 0; i < dellen; i++)
2311 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2312
2313 for (i = inlen; --i >= 0; ) {
2314 c = Py_CHARMASK(*input++);
2315 if (trans_table[c] != -1)
2316 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2317 continue;
2318 changed = 1;
2319 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002320 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 Py_DECREF(result);
2322 Py_INCREF(input_obj);
2323 return input_obj;
2324 }
2325 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002326 if (inlen > 0)
2327 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002328 return result;
2329}
2330
2331
2332/* What follows is used for implementing replace(). Perry Stoll. */
2333
2334/*
2335 mymemfind
2336
2337 strstr replacement for arbitrary blocks of memory.
2338
Barry Warsaw51ac5802000-03-20 16:36:48 +00002339 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 contents of memory pointed to by PAT. Returns the index into MEM if
2341 found, or -1 if not found. If len of PAT is greater than length of
2342 MEM, the function returns -1.
2343*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002344static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002345mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346{
2347 register int ii;
2348
2349 /* pattern can not occur in the last pat_len-1 chars */
2350 len -= pat_len;
2351
2352 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002353 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002354 return ii;
2355 }
2356 }
2357 return -1;
2358}
2359
2360/*
2361 mymemcnt
2362
2363 Return the number of distinct times PAT is found in MEM.
2364 meaning mem=1111 and pat==11 returns 2.
2365 mem=11111 and pat==11 also return 2.
2366 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002367static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002368mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369{
2370 register int offset = 0;
2371 int nfound = 0;
2372
2373 while (len >= 0) {
2374 offset = mymemfind(mem, len, pat, pat_len);
2375 if (offset == -1)
2376 break;
2377 mem += offset + pat_len;
2378 len -= offset + pat_len;
2379 nfound++;
2380 }
2381 return nfound;
2382}
2383
2384/*
2385 mymemreplace
2386
Thomas Wouters7e474022000-07-16 12:04:32 +00002387 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002388 replaced with SUB.
2389
Thomas Wouters7e474022000-07-16 12:04:32 +00002390 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391 of PAT in STR, then the original string is returned. Otherwise, a new
2392 string is allocated here and returned.
2393
2394 on return, out_len is:
2395 the length of output string, or
2396 -1 if the input string is returned, or
2397 unchanged if an error occurs (no memory).
2398
2399 return value is:
2400 the new string allocated locally, or
2401 NULL if an error occurred.
2402*/
2403static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002404mymemreplace(const char *str, int len, /* input string */
2405 const char *pat, int pat_len, /* pattern string to find */
2406 const char *sub, int sub_len, /* substitution string */
2407 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002408 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409{
2410 char *out_s;
2411 char *new_s;
2412 int nfound, offset, new_len;
2413
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002414 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415 goto return_same;
2416
2417 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002418 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002419 if (count < 0)
2420 count = INT_MAX;
2421 else if (nfound > count)
2422 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423 if (nfound == 0)
2424 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002425
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002427 if (new_len == 0) {
2428 /* Have to allocate something for the caller to free(). */
2429 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002430 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002431 return NULL;
2432 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002434 else {
2435 assert(new_len > 0);
2436 new_s = (char *)PyMem_MALLOC(new_len);
2437 if (new_s == NULL)
2438 return NULL;
2439 out_s = new_s;
2440
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002441 if (pat_len > 0) {
2442 for (; nfound > 0; --nfound) {
2443 /* find index of next instance of pattern */
2444 offset = mymemfind(str, len, pat, pat_len);
2445 if (offset == -1)
2446 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002447
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002448 /* copy non matching part of input string */
2449 memcpy(new_s, str, offset);
2450 str += offset + pat_len;
2451 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002452
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002453 /* copy substitute into the output string */
2454 new_s += offset;
2455 memcpy(new_s, sub, sub_len);
2456 new_s += sub_len;
2457 }
2458 /* copy any remaining values into output string */
2459 if (len > 0)
2460 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002461 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002462 else {
2463 for (;;++str, --len) {
2464 memcpy(new_s, sub, sub_len);
2465 new_s += sub_len;
2466 if (--nfound <= 0) {
2467 memcpy(new_s, str, len);
2468 break;
2469 }
2470 *new_s++ = *str;
2471 }
2472 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002473 }
2474 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002475 return out_s;
2476
2477 return_same:
2478 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002479 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480}
2481
2482
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002483PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002484"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002485\n\
2486Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002487old replaced by new. If the optional argument count is\n\
2488given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002489
2490static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002491string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002492{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002493 const char *str = PyString_AS_STRING(self), *sub, *repl;
2494 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002495 const int len = PyString_GET_SIZE(self);
2496 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002497 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002498 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002499 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002500
Guido van Rossum4c08d552000-03-10 22:55:18 +00002501 if (!PyArg_ParseTuple(args, "OO|i:replace",
2502 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002503 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504
2505 if (PyString_Check(subobj)) {
2506 sub = PyString_AS_STRING(subobj);
2507 sub_len = PyString_GET_SIZE(subobj);
2508 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002509#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002510 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002511 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002512 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002513#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002514 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2515 return NULL;
2516
2517 if (PyString_Check(replobj)) {
2518 repl = PyString_AS_STRING(replobj);
2519 repl_len = PyString_GET_SIZE(replobj);
2520 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002521#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002523 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002524 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002525#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2527 return NULL;
2528
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002530 if (new_s == NULL) {
2531 PyErr_NoMemory();
2532 return NULL;
2533 }
2534 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002535 if (PyString_CheckExact(self)) {
2536 /* we're returning another reference to self */
2537 new = (PyObject*)self;
2538 Py_INCREF(new);
2539 }
2540 else {
2541 new = PyString_FromStringAndSize(str, len);
2542 if (new == NULL)
2543 return NULL;
2544 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002545 }
2546 else {
2547 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002548 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002549 }
2550 return new;
2551}
2552
2553
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002554PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002555"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002556\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002557Return True if S starts with the specified prefix, False otherwise.\n\
2558With optional start, test S beginning at that position.\n\
2559With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002560
2561static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002562string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002564 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002565 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002566 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002567 int plen;
2568 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002569 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002570 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002571
Guido van Rossumc6821402000-05-08 14:08:05 +00002572 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2573 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002574 return NULL;
2575 if (PyString_Check(subobj)) {
2576 prefix = PyString_AS_STRING(subobj);
2577 plen = PyString_GET_SIZE(subobj);
2578 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002579#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002580 else if (PyUnicode_Check(subobj)) {
2581 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002582 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002583 subobj, start, end, -1);
2584 if (rc == -1)
2585 return NULL;
2586 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002587 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002588 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002589#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002591 return NULL;
2592
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002593 string_adjust_indices(&start, &end, len);
2594
2595 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002596 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002597
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002598 if (end-start >= plen)
2599 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2600 else
2601 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002602}
2603
2604
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002605PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002606"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002607\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002608Return True if S ends with the specified suffix, False otherwise.\n\
2609With optional start, test S beginning at that position.\n\
2610With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002611
2612static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002613string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002614{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002615 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002616 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002617 const char* suffix;
2618 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002619 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002620 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002621 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002622
Guido van Rossumc6821402000-05-08 14:08:05 +00002623 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2624 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002625 return NULL;
2626 if (PyString_Check(subobj)) {
2627 suffix = PyString_AS_STRING(subobj);
2628 slen = PyString_GET_SIZE(subobj);
2629 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002630#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002631 else if (PyUnicode_Check(subobj)) {
2632 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002633 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002634 subobj, start, end, +1);
2635 if (rc == -1)
2636 return NULL;
2637 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002638 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002639 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002640#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002641 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002642 return NULL;
2643
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002644 string_adjust_indices(&start, &end, len);
2645
2646 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002647 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002648
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002649 if (end-slen > start)
2650 start = end - slen;
2651 if (end-start >= slen)
2652 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2653 else
2654 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002655}
2656
2657
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002658PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002659"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002660\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002661Encodes S using the codec registered for encoding. encoding defaults\n\
2662to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002663handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002664a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2665'xmlcharrefreplace' as well as any other name registered with\n\
2666codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002667
2668static PyObject *
2669string_encode(PyStringObject *self, PyObject *args)
2670{
2671 char *encoding = NULL;
2672 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002673 PyObject *v;
2674
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002675 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2676 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002677 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002678 if (v == NULL)
2679 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002680 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2681 PyErr_Format(PyExc_TypeError,
2682 "encoder did not return a string/unicode object "
2683 "(type=%.400s)",
2684 v->ob_type->tp_name);
2685 Py_DECREF(v);
2686 return NULL;
2687 }
2688 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002689
2690 onError:
2691 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002692}
2693
2694
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002695PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002696"S.decode([encoding[,errors]]) -> object\n\
2697\n\
2698Decodes S using the codec registered for encoding. encoding defaults\n\
2699to the default encoding. errors may be given to set a different error\n\
2700handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002701a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2702as well as any other name registerd with codecs.register_error that is\n\
2703able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002704
2705static PyObject *
2706string_decode(PyStringObject *self, PyObject *args)
2707{
2708 char *encoding = NULL;
2709 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002710 PyObject *v;
2711
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002712 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2713 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002714 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002715 if (v == NULL)
2716 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002717 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2718 PyErr_Format(PyExc_TypeError,
2719 "decoder did not return a string/unicode object "
2720 "(type=%.400s)",
2721 v->ob_type->tp_name);
2722 Py_DECREF(v);
2723 return NULL;
2724 }
2725 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002726
2727 onError:
2728 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002729}
2730
2731
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002732PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002733"S.expandtabs([tabsize]) -> string\n\
2734\n\
2735Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002736If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002737
2738static PyObject*
2739string_expandtabs(PyStringObject *self, PyObject *args)
2740{
2741 const char *e, *p;
2742 char *q;
2743 int i, j;
2744 PyObject *u;
2745 int tabsize = 8;
2746
2747 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2748 return NULL;
2749
Thomas Wouters7e474022000-07-16 12:04:32 +00002750 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002751 i = j = 0;
2752 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2753 for (p = PyString_AS_STRING(self); p < e; p++)
2754 if (*p == '\t') {
2755 if (tabsize > 0)
2756 j += tabsize - (j % tabsize);
2757 }
2758 else {
2759 j++;
2760 if (*p == '\n' || *p == '\r') {
2761 i += j;
2762 j = 0;
2763 }
2764 }
2765
2766 /* Second pass: create output string and fill it */
2767 u = PyString_FromStringAndSize(NULL, i + j);
2768 if (!u)
2769 return NULL;
2770
2771 j = 0;
2772 q = PyString_AS_STRING(u);
2773
2774 for (p = PyString_AS_STRING(self); p < e; p++)
2775 if (*p == '\t') {
2776 if (tabsize > 0) {
2777 i = tabsize - (j % tabsize);
2778 j += i;
2779 while (i--)
2780 *q++ = ' ';
2781 }
2782 }
2783 else {
2784 j++;
2785 *q++ = *p;
2786 if (*p == '\n' || *p == '\r')
2787 j = 0;
2788 }
2789
2790 return u;
2791}
2792
Tim Peters8fa5dd02001-09-12 02:18:30 +00002793static PyObject *
2794pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002795{
2796 PyObject *u;
2797
2798 if (left < 0)
2799 left = 0;
2800 if (right < 0)
2801 right = 0;
2802
Tim Peters8fa5dd02001-09-12 02:18:30 +00002803 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002804 Py_INCREF(self);
2805 return (PyObject *)self;
2806 }
2807
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002808 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002809 left + PyString_GET_SIZE(self) + right);
2810 if (u) {
2811 if (left)
2812 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002813 memcpy(PyString_AS_STRING(u) + left,
2814 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002815 PyString_GET_SIZE(self));
2816 if (right)
2817 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2818 fill, right);
2819 }
2820
2821 return u;
2822}
2823
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002824PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002825"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002826"\n"
2827"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002828"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002829
2830static PyObject *
2831string_ljust(PyStringObject *self, PyObject *args)
2832{
2833 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002834 char fillchar = ' ';
2835
2836 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002837 return NULL;
2838
Tim Peters8fa5dd02001-09-12 02:18:30 +00002839 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002840 Py_INCREF(self);
2841 return (PyObject*) self;
2842 }
2843
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002844 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845}
2846
2847
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002848PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002849"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002850"\n"
2851"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002852"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002853
2854static PyObject *
2855string_rjust(PyStringObject *self, PyObject *args)
2856{
2857 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002858 char fillchar = ' ';
2859
2860 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002861 return NULL;
2862
Tim Peters8fa5dd02001-09-12 02:18:30 +00002863 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002864 Py_INCREF(self);
2865 return (PyObject*) self;
2866 }
2867
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002868 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869}
2870
2871
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002872PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002873"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002874"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002875"Return S centered in a string of length width. Padding is\n"
2876"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877
2878static PyObject *
2879string_center(PyStringObject *self, PyObject *args)
2880{
2881 int marg, left;
2882 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002883 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002884
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002885 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002886 return NULL;
2887
Tim Peters8fa5dd02001-09-12 02:18:30 +00002888 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002889 Py_INCREF(self);
2890 return (PyObject*) self;
2891 }
2892
2893 marg = width - PyString_GET_SIZE(self);
2894 left = marg / 2 + (marg & width & 1);
2895
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002896 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002897}
2898
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002899PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002900"S.zfill(width) -> string\n"
2901"\n"
2902"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002903"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002904
2905static PyObject *
2906string_zfill(PyStringObject *self, PyObject *args)
2907{
2908 int fill;
2909 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002910 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002911
2912 int width;
2913 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2914 return NULL;
2915
2916 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002917 if (PyString_CheckExact(self)) {
2918 Py_INCREF(self);
2919 return (PyObject*) self;
2920 }
2921 else
2922 return PyString_FromStringAndSize(
2923 PyString_AS_STRING(self),
2924 PyString_GET_SIZE(self)
2925 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002926 }
2927
2928 fill = width - PyString_GET_SIZE(self);
2929
2930 s = pad(self, fill, 0, '0');
2931
2932 if (s == NULL)
2933 return NULL;
2934
2935 p = PyString_AS_STRING(s);
2936 if (p[fill] == '+' || p[fill] == '-') {
2937 /* move sign to beginning of string */
2938 p[0] = p[fill];
2939 p[fill] = '0';
2940 }
2941
2942 return (PyObject*) s;
2943}
2944
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002945PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002946"S.isspace() -> bool\n\
2947\n\
2948Return True if all characters in S are whitespace\n\
2949and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002950
2951static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002952string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002953{
Fred Drakeba096332000-07-09 07:04:36 +00002954 register const unsigned char *p
2955 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002956 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002957
Guido van Rossum4c08d552000-03-10 22:55:18 +00002958 /* Shortcut for single character strings */
2959 if (PyString_GET_SIZE(self) == 1 &&
2960 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002961 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002962
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002963 /* Special case for empty strings */
2964 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002965 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002966
Guido van Rossum4c08d552000-03-10 22:55:18 +00002967 e = p + PyString_GET_SIZE(self);
2968 for (; p < e; p++) {
2969 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002970 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002971 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002972 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002973}
2974
2975
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002976PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002977"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002978\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002979Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002980and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002981
2982static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002983string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002984{
Fred Drakeba096332000-07-09 07:04:36 +00002985 register const unsigned char *p
2986 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002987 register const unsigned char *e;
2988
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002989 /* Shortcut for single character strings */
2990 if (PyString_GET_SIZE(self) == 1 &&
2991 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002992 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002993
2994 /* Special case for empty strings */
2995 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002996 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002997
2998 e = p + PyString_GET_SIZE(self);
2999 for (; p < e; p++) {
3000 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003001 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003002 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003003 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003004}
3005
3006
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003007PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003008"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003009\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003010Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003011and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003012
3013static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003014string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003015{
Fred Drakeba096332000-07-09 07:04:36 +00003016 register const unsigned char *p
3017 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003018 register const unsigned char *e;
3019
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003020 /* Shortcut for single character strings */
3021 if (PyString_GET_SIZE(self) == 1 &&
3022 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003023 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003024
3025 /* Special case for empty strings */
3026 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003027 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003028
3029 e = p + PyString_GET_SIZE(self);
3030 for (; p < e; p++) {
3031 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003032 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003033 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003034 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003035}
3036
3037
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003038PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003039"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003040\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003041Return True if all characters in S are digits\n\
3042and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003043
3044static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003045string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003046{
Fred Drakeba096332000-07-09 07:04:36 +00003047 register const unsigned char *p
3048 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003049 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003050
Guido van Rossum4c08d552000-03-10 22:55:18 +00003051 /* Shortcut for single character strings */
3052 if (PyString_GET_SIZE(self) == 1 &&
3053 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003054 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003055
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003056 /* Special case for empty strings */
3057 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003058 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003059
Guido van Rossum4c08d552000-03-10 22:55:18 +00003060 e = p + PyString_GET_SIZE(self);
3061 for (; p < e; p++) {
3062 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003063 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003064 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003065 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003066}
3067
3068
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003069PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003070"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003072Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003073at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003074
3075static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003076string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003077{
Fred Drakeba096332000-07-09 07:04:36 +00003078 register const unsigned char *p
3079 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003080 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003081 int cased;
3082
Guido van Rossum4c08d552000-03-10 22:55:18 +00003083 /* Shortcut for single character strings */
3084 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003085 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003087 /* Special case for empty strings */
3088 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003089 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003090
Guido van Rossum4c08d552000-03-10 22:55:18 +00003091 e = p + PyString_GET_SIZE(self);
3092 cased = 0;
3093 for (; p < e; p++) {
3094 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003095 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003096 else if (!cased && islower(*p))
3097 cased = 1;
3098 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003099 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003100}
3101
3102
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003103PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003104"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003105\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003106Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003107at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003108
3109static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003110string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003111{
Fred Drakeba096332000-07-09 07:04:36 +00003112 register const unsigned char *p
3113 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003114 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003115 int cased;
3116
Guido van Rossum4c08d552000-03-10 22:55:18 +00003117 /* Shortcut for single character strings */
3118 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003119 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003120
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003121 /* Special case for empty strings */
3122 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003123 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003124
Guido van Rossum4c08d552000-03-10 22:55:18 +00003125 e = p + PyString_GET_SIZE(self);
3126 cased = 0;
3127 for (; p < e; p++) {
3128 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003129 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003130 else if (!cased && isupper(*p))
3131 cased = 1;
3132 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003133 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003134}
3135
3136
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003137PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003138"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003139\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003140Return True if S is a titlecased string and there is at least one\n\
3141character in S, i.e. uppercase characters may only follow uncased\n\
3142characters and lowercase characters only cased ones. Return False\n\
3143otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003144
3145static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003146string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003147{
Fred Drakeba096332000-07-09 07:04:36 +00003148 register const unsigned char *p
3149 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003150 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003151 int cased, previous_is_cased;
3152
Guido van Rossum4c08d552000-03-10 22:55:18 +00003153 /* Shortcut for single character strings */
3154 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003155 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003156
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003157 /* Special case for empty strings */
3158 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003159 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003160
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 e = p + PyString_GET_SIZE(self);
3162 cased = 0;
3163 previous_is_cased = 0;
3164 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003165 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003166
3167 if (isupper(ch)) {
3168 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003169 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003170 previous_is_cased = 1;
3171 cased = 1;
3172 }
3173 else if (islower(ch)) {
3174 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003175 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176 previous_is_cased = 1;
3177 cased = 1;
3178 }
3179 else
3180 previous_is_cased = 0;
3181 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003182 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003183}
3184
3185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003186PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003187"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003188\n\
3189Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003190Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003191is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003192
Guido van Rossum4c08d552000-03-10 22:55:18 +00003193static PyObject*
3194string_splitlines(PyStringObject *self, PyObject *args)
3195{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196 register int i;
3197 register int j;
3198 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003199 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003200 PyObject *list;
3201 PyObject *str;
3202 char *data;
3203
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003204 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003205 return NULL;
3206
3207 data = PyString_AS_STRING(self);
3208 len = PyString_GET_SIZE(self);
3209
Guido van Rossum4c08d552000-03-10 22:55:18 +00003210 list = PyList_New(0);
3211 if (!list)
3212 goto onError;
3213
3214 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003215 int eol;
3216
Guido van Rossum4c08d552000-03-10 22:55:18 +00003217 /* Find a line and append it */
3218 while (i < len && data[i] != '\n' && data[i] != '\r')
3219 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003220
3221 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003222 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003223 if (i < len) {
3224 if (data[i] == '\r' && i + 1 < len &&
3225 data[i+1] == '\n')
3226 i += 2;
3227 else
3228 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003229 if (keepends)
3230 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003231 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003232 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003233 j = i;
3234 }
3235 if (j < len) {
3236 SPLIT_APPEND(data, j, len);
3237 }
3238
3239 return list;
3240
3241 onError:
3242 Py_DECREF(list);
3243 return NULL;
3244}
3245
3246#undef SPLIT_APPEND
3247
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003248static PyObject *
3249string_getnewargs(PyStringObject *v)
3250{
3251 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3252}
3253
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003254
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003255static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003256string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003257 /* Counterparts of the obsolete stropmodule functions; except
3258 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003259 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3260 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003261 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003262 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3263 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003264 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3265 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3266 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3267 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3268 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3269 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3270 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003271 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3272 capitalize__doc__},
3273 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3274 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3275 endswith__doc__},
3276 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3277 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3278 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3279 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3280 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3281 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3282 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3283 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3284 startswith__doc__},
3285 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3286 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3287 swapcase__doc__},
3288 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3289 translate__doc__},
3290 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3291 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3292 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3293 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3294 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3295 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3296 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3297 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3298 expandtabs__doc__},
3299 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3300 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003301 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003302 {NULL, NULL} /* sentinel */
3303};
3304
Jeremy Hylton938ace62002-07-17 16:30:39 +00003305static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003306str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3307
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003308static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003309string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003310{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003311 PyObject *x = NULL;
3312 static char *kwlist[] = {"object", 0};
3313
Guido van Rossumae960af2001-08-30 03:11:59 +00003314 if (type != &PyString_Type)
3315 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003316 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3317 return NULL;
3318 if (x == NULL)
3319 return PyString_FromString("");
3320 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003321}
3322
Guido van Rossumae960af2001-08-30 03:11:59 +00003323static PyObject *
3324str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3325{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003326 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003327 int n;
3328
3329 assert(PyType_IsSubtype(type, &PyString_Type));
3330 tmp = string_new(&PyString_Type, args, kwds);
3331 if (tmp == NULL)
3332 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003333 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003334 n = PyString_GET_SIZE(tmp);
3335 pnew = type->tp_alloc(type, n);
3336 if (pnew != NULL) {
3337 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003338 ((PyStringObject *)pnew)->ob_shash =
3339 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003340 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003341 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003342 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003343 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003344}
3345
Guido van Rossumcacfc072002-05-24 19:01:59 +00003346static PyObject *
3347basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3348{
3349 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003350 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003351 return NULL;
3352}
3353
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003354static PyObject *
3355string_mod(PyObject *v, PyObject *w)
3356{
3357 if (!PyString_Check(v)) {
3358 Py_INCREF(Py_NotImplemented);
3359 return Py_NotImplemented;
3360 }
3361 return PyString_Format(v, w);
3362}
3363
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003364PyDoc_STRVAR(basestring_doc,
3365"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003366
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003367static PyNumberMethods string_as_number = {
3368 0, /*nb_add*/
3369 0, /*nb_subtract*/
3370 0, /*nb_multiply*/
3371 0, /*nb_divide*/
3372 string_mod, /*nb_remainder*/
3373};
3374
3375
Guido van Rossumcacfc072002-05-24 19:01:59 +00003376PyTypeObject PyBaseString_Type = {
3377 PyObject_HEAD_INIT(&PyType_Type)
3378 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003379 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003380 0,
3381 0,
3382 0, /* tp_dealloc */
3383 0, /* tp_print */
3384 0, /* tp_getattr */
3385 0, /* tp_setattr */
3386 0, /* tp_compare */
3387 0, /* tp_repr */
3388 0, /* tp_as_number */
3389 0, /* tp_as_sequence */
3390 0, /* tp_as_mapping */
3391 0, /* tp_hash */
3392 0, /* tp_call */
3393 0, /* tp_str */
3394 0, /* tp_getattro */
3395 0, /* tp_setattro */
3396 0, /* tp_as_buffer */
3397 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3398 basestring_doc, /* tp_doc */
3399 0, /* tp_traverse */
3400 0, /* tp_clear */
3401 0, /* tp_richcompare */
3402 0, /* tp_weaklistoffset */
3403 0, /* tp_iter */
3404 0, /* tp_iternext */
3405 0, /* tp_methods */
3406 0, /* tp_members */
3407 0, /* tp_getset */
3408 &PyBaseObject_Type, /* tp_base */
3409 0, /* tp_dict */
3410 0, /* tp_descr_get */
3411 0, /* tp_descr_set */
3412 0, /* tp_dictoffset */
3413 0, /* tp_init */
3414 0, /* tp_alloc */
3415 basestring_new, /* tp_new */
3416 0, /* tp_free */
3417};
3418
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003419PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003420"str(object) -> string\n\
3421\n\
3422Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003423If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003424
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003425PyTypeObject PyString_Type = {
3426 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003427 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003428 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003429 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003430 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003431 (destructor)string_dealloc, /* tp_dealloc */
3432 (printfunc)string_print, /* tp_print */
3433 0, /* tp_getattr */
3434 0, /* tp_setattr */
3435 0, /* tp_compare */
3436 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003437 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003438 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003439 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003440 (hashfunc)string_hash, /* tp_hash */
3441 0, /* tp_call */
3442 (reprfunc)string_str, /* tp_str */
3443 PyObject_GenericGetAttr, /* tp_getattro */
3444 0, /* tp_setattro */
3445 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003446 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3447 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003448 string_doc, /* tp_doc */
3449 0, /* tp_traverse */
3450 0, /* tp_clear */
3451 (richcmpfunc)string_richcompare, /* tp_richcompare */
3452 0, /* tp_weaklistoffset */
3453 0, /* tp_iter */
3454 0, /* tp_iternext */
3455 string_methods, /* tp_methods */
3456 0, /* tp_members */
3457 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003458 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003459 0, /* tp_dict */
3460 0, /* tp_descr_get */
3461 0, /* tp_descr_set */
3462 0, /* tp_dictoffset */
3463 0, /* tp_init */
3464 0, /* tp_alloc */
3465 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003466 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003467};
3468
3469void
Fred Drakeba096332000-07-09 07:04:36 +00003470PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003471{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003472 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003473 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003474 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003475 if (w == NULL || !PyString_Check(*pv)) {
3476 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003477 *pv = NULL;
3478 return;
3479 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003480 v = string_concat((PyStringObject *) *pv, w);
3481 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003482 *pv = v;
3483}
3484
Guido van Rossum013142a1994-08-30 08:19:36 +00003485void
Fred Drakeba096332000-07-09 07:04:36 +00003486PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003487{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003488 PyString_Concat(pv, w);
3489 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003490}
3491
3492
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003493/* The following function breaks the notion that strings are immutable:
3494 it changes the size of a string. We get away with this only if there
3495 is only one module referencing the object. You can also think of it
3496 as creating a new string object and destroying the old one, only
3497 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003498 already be known to some other part of the code...
3499 Note that if there's not enough memory to resize the string, the original
3500 string object at *pv is deallocated, *pv is set to NULL, an "out of
3501 memory" exception is set, and -1 is returned. Else (on success) 0 is
3502 returned, and the value in *pv may or may not be the same as on input.
3503 As always, an extra byte is allocated for a trailing \0 byte (newsize
3504 does *not* include that), and a trailing \0 byte is stored.
3505*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003506
3507int
Fred Drakeba096332000-07-09 07:04:36 +00003508_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003509{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003510 register PyObject *v;
3511 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003512 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003513 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3514 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003515 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003516 Py_DECREF(v);
3517 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003518 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003519 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003520 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003521 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003522 _Py_ForgetReference(v);
3523 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003524 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003525 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003526 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003527 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003528 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003529 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003530 _Py_NewReference(*pv);
3531 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003532 sv->ob_size = newsize;
3533 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003534 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003535 return 0;
3536}
Guido van Rossume5372401993-03-16 12:15:04 +00003537
3538/* Helpers for formatstring */
3539
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003540static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003541getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003542{
3543 int argidx = *p_argidx;
3544 if (argidx < arglen) {
3545 (*p_argidx)++;
3546 if (arglen < 0)
3547 return args;
3548 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003549 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003550 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003551 PyErr_SetString(PyExc_TypeError,
3552 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003553 return NULL;
3554}
3555
Tim Peters38fd5b62000-09-21 05:43:11 +00003556/* Format codes
3557 * F_LJUST '-'
3558 * F_SIGN '+'
3559 * F_BLANK ' '
3560 * F_ALT '#'
3561 * F_ZERO '0'
3562 */
Guido van Rossume5372401993-03-16 12:15:04 +00003563#define F_LJUST (1<<0)
3564#define F_SIGN (1<<1)
3565#define F_BLANK (1<<2)
3566#define F_ALT (1<<3)
3567#define F_ZERO (1<<4)
3568
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003569static int
Fred Drakeba096332000-07-09 07:04:36 +00003570formatfloat(char *buf, size_t buflen, int flags,
3571 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003572{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003573 /* fmt = '%#.' + `prec` + `type`
3574 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003575 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003576 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003577 x = PyFloat_AsDouble(v);
3578 if (x == -1.0 && PyErr_Occurred()) {
3579 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003580 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003581 }
Guido van Rossume5372401993-03-16 12:15:04 +00003582 if (prec < 0)
3583 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003584 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3585 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003586 /* Worst case length calc to ensure no buffer overrun:
3587
3588 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003589 fmt = %#.<prec>g
3590 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003591 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003592 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003593
3594 'f' formats:
3595 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3596 len = 1 + 50 + 1 + prec = 52 + prec
3597
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003598 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003599 always given), therefore increase the length by one.
3600
3601 */
3602 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3603 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003604 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003605 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003606 return -1;
3607 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003608 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3609 (flags&F_ALT) ? "#" : "",
3610 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003611 PyOS_ascii_formatd(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003612 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003613}
3614
Tim Peters38fd5b62000-09-21 05:43:11 +00003615/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3616 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3617 * Python's regular ints.
3618 * Return value: a new PyString*, or NULL if error.
3619 * . *pbuf is set to point into it,
3620 * *plen set to the # of chars following that.
3621 * Caller must decref it when done using pbuf.
3622 * The string starting at *pbuf is of the form
3623 * "-"? ("0x" | "0X")? digit+
3624 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003625 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003626 * There will be at least prec digits, zero-filled on the left if
3627 * necessary to get that many.
3628 * val object to be converted
3629 * flags bitmask of format flags; only F_ALT is looked at
3630 * prec minimum number of digits; 0-fill on left if needed
3631 * type a character in [duoxX]; u acts the same as d
3632 *
3633 * CAUTION: o, x and X conversions on regular ints can never
3634 * produce a '-' sign, but can for Python's unbounded ints.
3635 */
3636PyObject*
3637_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3638 char **pbuf, int *plen)
3639{
3640 PyObject *result = NULL;
3641 char *buf;
3642 int i;
3643 int sign; /* 1 if '-', else 0 */
3644 int len; /* number of characters */
3645 int numdigits; /* len == numnondigits + numdigits */
3646 int numnondigits = 0;
3647
3648 switch (type) {
3649 case 'd':
3650 case 'u':
3651 result = val->ob_type->tp_str(val);
3652 break;
3653 case 'o':
3654 result = val->ob_type->tp_as_number->nb_oct(val);
3655 break;
3656 case 'x':
3657 case 'X':
3658 numnondigits = 2;
3659 result = val->ob_type->tp_as_number->nb_hex(val);
3660 break;
3661 default:
3662 assert(!"'type' not in [duoxX]");
3663 }
3664 if (!result)
3665 return NULL;
3666
3667 /* To modify the string in-place, there can only be one reference. */
3668 if (result->ob_refcnt != 1) {
3669 PyErr_BadInternalCall();
3670 return NULL;
3671 }
3672 buf = PyString_AsString(result);
3673 len = PyString_Size(result);
3674 if (buf[len-1] == 'L') {
3675 --len;
3676 buf[len] = '\0';
3677 }
3678 sign = buf[0] == '-';
3679 numnondigits += sign;
3680 numdigits = len - numnondigits;
3681 assert(numdigits > 0);
3682
Tim Petersfff53252001-04-12 18:38:48 +00003683 /* Get rid of base marker unless F_ALT */
3684 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003685 /* Need to skip 0x, 0X or 0. */
3686 int skipped = 0;
3687 switch (type) {
3688 case 'o':
3689 assert(buf[sign] == '0');
3690 /* If 0 is only digit, leave it alone. */
3691 if (numdigits > 1) {
3692 skipped = 1;
3693 --numdigits;
3694 }
3695 break;
3696 case 'x':
3697 case 'X':
3698 assert(buf[sign] == '0');
3699 assert(buf[sign + 1] == 'x');
3700 skipped = 2;
3701 numnondigits -= 2;
3702 break;
3703 }
3704 if (skipped) {
3705 buf += skipped;
3706 len -= skipped;
3707 if (sign)
3708 buf[0] = '-';
3709 }
3710 assert(len == numnondigits + numdigits);
3711 assert(numdigits > 0);
3712 }
3713
3714 /* Fill with leading zeroes to meet minimum width. */
3715 if (prec > numdigits) {
3716 PyObject *r1 = PyString_FromStringAndSize(NULL,
3717 numnondigits + prec);
3718 char *b1;
3719 if (!r1) {
3720 Py_DECREF(result);
3721 return NULL;
3722 }
3723 b1 = PyString_AS_STRING(r1);
3724 for (i = 0; i < numnondigits; ++i)
3725 *b1++ = *buf++;
3726 for (i = 0; i < prec - numdigits; i++)
3727 *b1++ = '0';
3728 for (i = 0; i < numdigits; i++)
3729 *b1++ = *buf++;
3730 *b1 = '\0';
3731 Py_DECREF(result);
3732 result = r1;
3733 buf = PyString_AS_STRING(result);
3734 len = numnondigits + prec;
3735 }
3736
3737 /* Fix up case for hex conversions. */
3738 switch (type) {
3739 case 'x':
3740 /* Need to convert all upper case letters to lower case. */
3741 for (i = 0; i < len; i++)
3742 if (buf[i] >= 'A' && buf[i] <= 'F')
3743 buf[i] += 'a'-'A';
3744 break;
3745 case 'X':
3746 /* Need to convert 0x to 0X (and -0x to -0X). */
3747 if (buf[sign + 1] == 'x')
3748 buf[sign + 1] = 'X';
3749 break;
3750 }
3751 *pbuf = buf;
3752 *plen = len;
3753 return result;
3754}
3755
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003756static int
Fred Drakeba096332000-07-09 07:04:36 +00003757formatint(char *buf, size_t buflen, int flags,
3758 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003759{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003760 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003761 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3762 + 1 + 1 = 24 */
3763 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003764 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003765 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003766
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003767 x = PyInt_AsLong(v);
3768 if (x == -1 && PyErr_Occurred()) {
3769 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003770 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003771 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003772 if (x < 0 && type == 'u') {
3773 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003774 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003775 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3776 sign = "-";
3777 else
3778 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003779 if (prec < 0)
3780 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003781
3782 if ((flags & F_ALT) &&
3783 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003784 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003785 * of issues that cause pain:
3786 * - when 0 is being converted, the C standard leaves off
3787 * the '0x' or '0X', which is inconsistent with other
3788 * %#x/%#X conversions and inconsistent with Python's
3789 * hex() function
3790 * - there are platforms that violate the standard and
3791 * convert 0 with the '0x' or '0X'
3792 * (Metrowerks, Compaq Tru64)
3793 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003794 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003795 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003796 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003797 * We can achieve the desired consistency by inserting our
3798 * own '0x' or '0X' prefix, and substituting %x/%X in place
3799 * of %#x/%#X.
3800 *
3801 * Note that this is the same approach as used in
3802 * formatint() in unicodeobject.c
3803 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003804 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3805 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003806 }
3807 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003808 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3809 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003810 prec, type);
3811 }
3812
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003813 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3814 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003815 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003816 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003817 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003818 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003819 return -1;
3820 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003821 if (sign[0])
3822 PyOS_snprintf(buf, buflen, fmt, -x);
3823 else
3824 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003825 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003826}
3827
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003828static int
Fred Drakeba096332000-07-09 07:04:36 +00003829formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003830{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003831 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003832 if (PyString_Check(v)) {
3833 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003834 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003835 }
3836 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003837 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003838 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003839 }
3840 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003841 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003842}
3843
Guido van Rossum013142a1994-08-30 08:19:36 +00003844
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003845/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3846
3847 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3848 chars are formatted. XXX This is a magic number. Each formatting
3849 routine does bounds checking to ensure no overflow, but a better
3850 solution may be to malloc a buffer of appropriate size for each
3851 format. For now, the current solution is sufficient.
3852*/
3853#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003854
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003855PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003856PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003857{
3858 char *fmt, *res;
3859 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003860 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003861 PyObject *result, *orig_args;
3862#ifdef Py_USING_UNICODE
3863 PyObject *v, *w;
3864#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003865 PyObject *dict = NULL;
3866 if (format == NULL || !PyString_Check(format) || args == NULL) {
3867 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003868 return NULL;
3869 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003870 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003871 fmt = PyString_AS_STRING(format);
3872 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003873 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003874 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003875 if (result == NULL)
3876 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003877 res = PyString_AsString(result);
3878 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003879 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003880 argidx = 0;
3881 }
3882 else {
3883 arglen = -1;
3884 argidx = -2;
3885 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003886 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3887 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003888 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003889 while (--fmtcnt >= 0) {
3890 if (*fmt != '%') {
3891 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003892 rescnt = fmtcnt + 100;
3893 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003894 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003895 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003896 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003897 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003898 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003899 }
3900 *res++ = *fmt++;
3901 }
3902 else {
3903 /* Got a format specifier */
3904 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003905 int width = -1;
3906 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003907 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003908 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003909 PyObject *v = NULL;
3910 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003911 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003912 int sign;
3913 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003914 char formatbuf[FORMATBUFLEN];
3915 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003916#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003917 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003918 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003919#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003920
Guido van Rossumda9c2711996-12-05 21:58:58 +00003921 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003922 if (*fmt == '(') {
3923 char *keystart;
3924 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003925 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003926 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003927
3928 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003929 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003930 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003931 goto error;
3932 }
3933 ++fmt;
3934 --fmtcnt;
3935 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003936 /* Skip over balanced parentheses */
3937 while (pcount > 0 && --fmtcnt >= 0) {
3938 if (*fmt == ')')
3939 --pcount;
3940 else if (*fmt == '(')
3941 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003942 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003943 }
3944 keylen = fmt - keystart - 1;
3945 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003946 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003947 "incomplete format key");
3948 goto error;
3949 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003950 key = PyString_FromStringAndSize(keystart,
3951 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003952 if (key == NULL)
3953 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003954 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003955 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003956 args_owned = 0;
3957 }
3958 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003959 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003960 if (args == NULL) {
3961 goto error;
3962 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003963 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003964 arglen = -1;
3965 argidx = -2;
3966 }
Guido van Rossume5372401993-03-16 12:15:04 +00003967 while (--fmtcnt >= 0) {
3968 switch (c = *fmt++) {
3969 case '-': flags |= F_LJUST; continue;
3970 case '+': flags |= F_SIGN; continue;
3971 case ' ': flags |= F_BLANK; continue;
3972 case '#': flags |= F_ALT; continue;
3973 case '0': flags |= F_ZERO; continue;
3974 }
3975 break;
3976 }
3977 if (c == '*') {
3978 v = getnextarg(args, arglen, &argidx);
3979 if (v == NULL)
3980 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003981 if (!PyInt_Check(v)) {
3982 PyErr_SetString(PyExc_TypeError,
3983 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003984 goto error;
3985 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003986 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003987 if (width < 0) {
3988 flags |= F_LJUST;
3989 width = -width;
3990 }
Guido van Rossume5372401993-03-16 12:15:04 +00003991 if (--fmtcnt >= 0)
3992 c = *fmt++;
3993 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003994 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003995 width = c - '0';
3996 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003997 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003998 if (!isdigit(c))
3999 break;
4000 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004001 PyErr_SetString(
4002 PyExc_ValueError,
4003 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004004 goto error;
4005 }
4006 width = width*10 + (c - '0');
4007 }
4008 }
4009 if (c == '.') {
4010 prec = 0;
4011 if (--fmtcnt >= 0)
4012 c = *fmt++;
4013 if (c == '*') {
4014 v = getnextarg(args, arglen, &argidx);
4015 if (v == NULL)
4016 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004017 if (!PyInt_Check(v)) {
4018 PyErr_SetString(
4019 PyExc_TypeError,
4020 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004021 goto error;
4022 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004023 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004024 if (prec < 0)
4025 prec = 0;
4026 if (--fmtcnt >= 0)
4027 c = *fmt++;
4028 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004029 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004030 prec = c - '0';
4031 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004032 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004033 if (!isdigit(c))
4034 break;
4035 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004036 PyErr_SetString(
4037 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004038 "prec too big");
4039 goto error;
4040 }
4041 prec = prec*10 + (c - '0');
4042 }
4043 }
4044 } /* prec */
4045 if (fmtcnt >= 0) {
4046 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004047 if (--fmtcnt >= 0)
4048 c = *fmt++;
4049 }
4050 }
4051 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004052 PyErr_SetString(PyExc_ValueError,
4053 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004054 goto error;
4055 }
4056 if (c != '%') {
4057 v = getnextarg(args, arglen, &argidx);
4058 if (v == NULL)
4059 goto error;
4060 }
4061 sign = 0;
4062 fill = ' ';
4063 switch (c) {
4064 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004065 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004066 len = 1;
4067 break;
4068 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004069#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004070 if (PyUnicode_Check(v)) {
4071 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004072 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004073 goto unicode;
4074 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004075#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004076 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004077 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004078 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00004079 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004080 else
4081 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004082 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004083 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004084 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00004085 /* XXX Note: this should never happen,
4086 since PyObject_Repr() and
4087 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004088 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004089 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004090 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004091 goto error;
4092 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004093 pbuf = PyString_AS_STRING(temp);
4094 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004095 if (prec >= 0 && len > prec)
4096 len = prec;
4097 break;
4098 case 'i':
4099 case 'd':
4100 case 'u':
4101 case 'o':
4102 case 'x':
4103 case 'X':
4104 if (c == 'i')
4105 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004106 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004107 temp = _PyString_FormatLong(v, flags,
4108 prec, c, &pbuf, &len);
4109 if (!temp)
4110 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004111 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004112 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004113 else {
4114 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004115 len = formatint(pbuf,
4116 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004117 flags, prec, c, v);
4118 if (len < 0)
4119 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004120 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004121 }
4122 if (flags & F_ZERO)
4123 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004124 break;
4125 case 'e':
4126 case 'E':
4127 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004128 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004129 case 'g':
4130 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004131 if (c == 'F')
4132 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004133 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004134 len = formatfloat(pbuf, sizeof(formatbuf),
4135 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004136 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004137 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004138 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004139 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004140 fill = '0';
4141 break;
4142 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004143#ifdef Py_USING_UNICODE
4144 if (PyUnicode_Check(v)) {
4145 fmt = fmt_start;
4146 argidx = argidx_start;
4147 goto unicode;
4148 }
4149#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004150 pbuf = formatbuf;
4151 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004152 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004153 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004154 break;
4155 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004156 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004157 "unsupported format character '%c' (0x%x) "
4158 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004159 c, c,
4160 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004161 goto error;
4162 }
4163 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004164 if (*pbuf == '-' || *pbuf == '+') {
4165 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004166 len--;
4167 }
4168 else if (flags & F_SIGN)
4169 sign = '+';
4170 else if (flags & F_BLANK)
4171 sign = ' ';
4172 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004173 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004174 }
4175 if (width < len)
4176 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004177 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004178 reslen -= rescnt;
4179 rescnt = width + fmtcnt + 100;
4180 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004181 if (reslen < 0) {
4182 Py_DECREF(result);
4183 return PyErr_NoMemory();
4184 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004185 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004186 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004187 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004188 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004189 }
4190 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004191 if (fill != ' ')
4192 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004193 rescnt--;
4194 if (width > len)
4195 width--;
4196 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004197 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4198 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004199 assert(pbuf[1] == c);
4200 if (fill != ' ') {
4201 *res++ = *pbuf++;
4202 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004203 }
Tim Petersfff53252001-04-12 18:38:48 +00004204 rescnt -= 2;
4205 width -= 2;
4206 if (width < 0)
4207 width = 0;
4208 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004209 }
4210 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004211 do {
4212 --rescnt;
4213 *res++ = fill;
4214 } while (--width > len);
4215 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004216 if (fill == ' ') {
4217 if (sign)
4218 *res++ = sign;
4219 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004220 (c == 'x' || c == 'X')) {
4221 assert(pbuf[0] == '0');
4222 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004223 *res++ = *pbuf++;
4224 *res++ = *pbuf++;
4225 }
4226 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004227 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004228 res += len;
4229 rescnt -= len;
4230 while (--width >= len) {
4231 --rescnt;
4232 *res++ = ' ';
4233 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004234 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004235 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004236 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004237 goto error;
4238 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004239 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004240 } /* '%' */
4241 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004242 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004243 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004244 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004245 goto error;
4246 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004247 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004248 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004249 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004250 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004251 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004252
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004253#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004254 unicode:
4255 if (args_owned) {
4256 Py_DECREF(args);
4257 args_owned = 0;
4258 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004259 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004260 if (PyTuple_Check(orig_args) && argidx > 0) {
4261 PyObject *v;
4262 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4263 v = PyTuple_New(n);
4264 if (v == NULL)
4265 goto error;
4266 while (--n >= 0) {
4267 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4268 Py_INCREF(w);
4269 PyTuple_SET_ITEM(v, n, w);
4270 }
4271 args = v;
4272 } else {
4273 Py_INCREF(orig_args);
4274 args = orig_args;
4275 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004276 args_owned = 1;
4277 /* Take what we have of the result and let the Unicode formatting
4278 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004279 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004280 if (_PyString_Resize(&result, rescnt))
4281 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004282 fmtcnt = PyString_GET_SIZE(format) - \
4283 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004284 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4285 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004286 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004287 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004288 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004289 if (v == NULL)
4290 goto error;
4291 /* Paste what we have (result) to what the Unicode formatting
4292 function returned (v) and return the result (or error) */
4293 w = PyUnicode_Concat(result, v);
4294 Py_DECREF(result);
4295 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004296 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004297 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004298#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004299
Guido van Rossume5372401993-03-16 12:15:04 +00004300 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004301 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004302 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004303 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004304 }
Guido van Rossume5372401993-03-16 12:15:04 +00004305 return NULL;
4306}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004307
Guido van Rossum2a61e741997-01-18 07:55:05 +00004308void
Fred Drakeba096332000-07-09 07:04:36 +00004309PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004310{
4311 register PyStringObject *s = (PyStringObject *)(*p);
4312 PyObject *t;
4313 if (s == NULL || !PyString_Check(s))
4314 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004315 /* If it's a string subclass, we don't really know what putting
4316 it in the interned dict might do. */
4317 if (!PyString_CheckExact(s))
4318 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004319 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004320 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004321 if (interned == NULL) {
4322 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004323 if (interned == NULL) {
4324 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004325 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004326 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004327 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004328 t = PyDict_GetItem(interned, (PyObject *)s);
4329 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004330 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004331 Py_DECREF(*p);
4332 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004333 return;
4334 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004335
Armin Rigo79f7ad22004-08-07 19:27:39 +00004336 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004337 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004338 return;
4339 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004340 /* The two references in interned are not counted by refcnt.
4341 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004342 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004343 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004344}
4345
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004346void
4347PyString_InternImmortal(PyObject **p)
4348{
4349 PyString_InternInPlace(p);
4350 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4351 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4352 Py_INCREF(*p);
4353 }
4354}
4355
Guido van Rossum2a61e741997-01-18 07:55:05 +00004356
4357PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004358PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004359{
4360 PyObject *s = PyString_FromString(cp);
4361 if (s == NULL)
4362 return NULL;
4363 PyString_InternInPlace(&s);
4364 return s;
4365}
4366
Guido van Rossum8cf04761997-08-02 02:57:45 +00004367void
Fred Drakeba096332000-07-09 07:04:36 +00004368PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004369{
4370 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004371 for (i = 0; i < UCHAR_MAX + 1; i++) {
4372 Py_XDECREF(characters[i]);
4373 characters[i] = NULL;
4374 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004375 Py_XDECREF(nullstring);
4376 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004377}
Barry Warsawa903ad982001-02-23 16:40:48 +00004378
Barry Warsawa903ad982001-02-23 16:40:48 +00004379void _Py_ReleaseInternedStrings(void)
4380{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004381 PyObject *keys;
4382 PyStringObject *s;
4383 int i, n;
4384
4385 if (interned == NULL || !PyDict_Check(interned))
4386 return;
4387 keys = PyDict_Keys(interned);
4388 if (keys == NULL || !PyList_Check(keys)) {
4389 PyErr_Clear();
4390 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004391 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004392
4393 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4394 detector, interned strings are not forcibly deallocated; rather, we
4395 give them their stolen references back, and then clear and DECREF
4396 the interned dict. */
4397
4398 fprintf(stderr, "releasing interned strings\n");
4399 n = PyList_GET_SIZE(keys);
4400 for (i = 0; i < n; i++) {
4401 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4402 switch (s->ob_sstate) {
4403 case SSTATE_NOT_INTERNED:
4404 /* XXX Shouldn't happen */
4405 break;
4406 case SSTATE_INTERNED_IMMORTAL:
4407 s->ob_refcnt += 1;
4408 break;
4409 case SSTATE_INTERNED_MORTAL:
4410 s->ob_refcnt += 2;
4411 break;
4412 default:
4413 Py_FatalError("Inconsistent interned string state.");
4414 }
4415 s->ob_sstate = SSTATE_NOT_INTERNED;
4416 }
4417 Py_DECREF(keys);
4418 PyDict_Clear(interned);
4419 Py_DECREF(interned);
4420 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004421}