blob: 78560de9d6bcf4ea2557cc37ac57cca66c64945a [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000157 int n = 0;
158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d94e2002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d94e2002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000184
Barry Warsawdadace02001-08-24 18:32:06 +0000185 switch (*f) {
186 case 'c':
187 (void)va_arg(count, int);
188 /* fall through... */
189 case '%':
190 n++;
191 break;
192 case 'd': case 'i': case 'x':
193 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000194 /* 20 bytes is enough to hold a 64-bit
195 integer. Decimal takes the most space.
196 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000197 n += 20;
198 break;
199 case 's':
200 s = va_arg(count, char*);
201 n += strlen(s);
202 break;
203 case 'p':
204 (void) va_arg(count, int);
205 /* maximum 64-bit pointer representation:
206 * 0xffffffffffffffff
207 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000208 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000209 */
210 n += 19;
211 break;
212 default:
213 /* if we stumble upon an unknown
214 formatting code, copy the rest of
215 the format string to the output
216 string. (we cannot just skip the
217 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000218 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 n += strlen(p);
220 goto expand;
221 }
222 } else
223 n++;
224 }
225 expand:
226 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000227 /* Since we've analyzed how much space we need for the worst case,
228 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000229 string = PyString_FromStringAndSize(NULL, n);
230 if (!string)
231 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000232
Barry Warsawdadace02001-08-24 18:32:06 +0000233 s = PyString_AsString(string);
234
235 for (f = format; *f; f++) {
236 if (*f == '%') {
237 const char* p = f++;
238 int i, longflag = 0;
239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
242 while (isdigit(Py_CHARMASK(*f)))
243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 }
250 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
251 f++;
252 /* handle the long flag, but only for %ld. others
253 can be added when necessary. */
254 if (*f == 'l' && *(f+1) == 'd') {
255 longflag = 1;
256 ++f;
257 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000258
Barry Warsawdadace02001-08-24 18:32:06 +0000259 switch (*f) {
260 case 'c':
261 *s++ = va_arg(vargs, int);
262 break;
263 case 'd':
264 if (longflag)
265 sprintf(s, "%ld", va_arg(vargs, long));
266 else
267 sprintf(s, "%d", va_arg(vargs, int));
268 s += strlen(s);
269 break;
270 case 'i':
271 sprintf(s, "%i", va_arg(vargs, int));
272 s += strlen(s);
273 break;
274 case 'x':
275 sprintf(s, "%x", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 's':
279 p = va_arg(vargs, char*);
280 i = strlen(p);
281 if (n > 0 && i > n)
282 i = n;
283 memcpy(s, p, i);
284 s += i;
285 break;
286 case 'p':
287 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000288 /* %p is ill-defined: ensure leading 0x. */
289 if (s[1] == 'X')
290 s[1] = 'x';
291 else if (s[1] != 'x') {
292 memmove(s+2, s, strlen(s)+1);
293 s[0] = '0';
294 s[1] = 'x';
295 }
Barry Warsawdadace02001-08-24 18:32:06 +0000296 s += strlen(s);
297 break;
298 case '%':
299 *s++ = '%';
300 break;
301 default:
302 strcpy(s, p);
303 s += strlen(s);
304 goto end;
305 }
306 } else
307 *s++ = *f;
308 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000309
Barry Warsawdadace02001-08-24 18:32:06 +0000310 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000311 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000312 return string;
313}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000316PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000317{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000318 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319 va_list vargs;
320
321#ifdef HAVE_STDARG_PROTOTYPES
322 va_start(vargs, format);
323#else
324 va_start(vargs);
325#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000326 ret = PyString_FromFormatV(format, vargs);
327 va_end(vargs);
328 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000329}
330
331
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000332PyObject *PyString_Decode(const char *s,
333 int size,
334 const char *encoding,
335 const char *errors)
336{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000337 PyObject *v, *str;
338
339 str = PyString_FromStringAndSize(s, size);
340 if (str == NULL)
341 return NULL;
342 v = PyString_AsDecodedString(str, encoding, errors);
343 Py_DECREF(str);
344 return v;
345}
346
347PyObject *PyString_AsDecodedObject(PyObject *str,
348 const char *encoding,
349 const char *errors)
350{
351 PyObject *v;
352
353 if (!PyString_Check(str)) {
354 PyErr_BadArgument();
355 goto onError;
356 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000357
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000358 if (encoding == NULL) {
359#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000361#else
362 PyErr_SetString(PyExc_ValueError, "no encoding specified");
363 goto onError;
364#endif
365 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000366
367 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000368 v = PyCodec_Decode(str, encoding, errors);
369 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000370 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000371
372 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000373
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000374 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000375 return NULL;
376}
377
378PyObject *PyString_AsDecodedString(PyObject *str,
379 const char *encoding,
380 const char *errors)
381{
382 PyObject *v;
383
384 v = PyString_AsDecodedObject(str, encoding, errors);
385 if (v == NULL)
386 goto onError;
387
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000388#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389 /* Convert Unicode to a string using the default encoding */
390 if (PyUnicode_Check(v)) {
391 PyObject *temp = v;
392 v = PyUnicode_AsEncodedString(v, NULL, NULL);
393 Py_DECREF(temp);
394 if (v == NULL)
395 goto onError;
396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000397#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 if (!PyString_Check(v)) {
399 PyErr_Format(PyExc_TypeError,
400 "decoder did not return a string object (type=%.400s)",
401 v->ob_type->tp_name);
402 Py_DECREF(v);
403 goto onError;
404 }
405
406 return v;
407
408 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 return NULL;
410}
411
412PyObject *PyString_Encode(const char *s,
413 int size,
414 const char *encoding,
415 const char *errors)
416{
417 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000418
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000419 str = PyString_FromStringAndSize(s, size);
420 if (str == NULL)
421 return NULL;
422 v = PyString_AsEncodedString(str, encoding, errors);
423 Py_DECREF(str);
424 return v;
425}
426
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000427PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000428 const char *encoding,
429 const char *errors)
430{
431 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000432
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 if (!PyString_Check(str)) {
434 PyErr_BadArgument();
435 goto onError;
436 }
437
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000438 if (encoding == NULL) {
439#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000440 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000441#else
442 PyErr_SetString(PyExc_ValueError, "no encoding specified");
443 goto onError;
444#endif
445 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446
447 /* Encode via the codec registry */
448 v = PyCodec_Encode(str, encoding, errors);
449 if (v == NULL)
450 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451
452 return v;
453
454 onError:
455 return NULL;
456}
457
458PyObject *PyString_AsEncodedString(PyObject *str,
459 const char *encoding,
460 const char *errors)
461{
462 PyObject *v;
463
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000464 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000465 if (v == NULL)
466 goto onError;
467
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000468#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469 /* Convert Unicode to a string using the default encoding */
470 if (PyUnicode_Check(v)) {
471 PyObject *temp = v;
472 v = PyUnicode_AsEncodedString(v, NULL, NULL);
473 Py_DECREF(temp);
474 if (v == NULL)
475 goto onError;
476 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000477#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 if (!PyString_Check(v)) {
479 PyErr_Format(PyExc_TypeError,
480 "encoder did not return a string object (type=%.400s)",
481 v->ob_type->tp_name);
482 Py_DECREF(v);
483 goto onError;
484 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000485
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000486 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000487
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000488 onError:
489 return NULL;
490}
491
Guido van Rossum234f9421993-06-17 12:35:49 +0000492static void
Fred Drakeba096332000-07-09 07:04:36 +0000493string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000494{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000495 switch (PyString_CHECK_INTERNED(op)) {
496 case SSTATE_NOT_INTERNED:
497 break;
498
499 case SSTATE_INTERNED_MORTAL:
500 /* revive dead object temporarily for DelItem */
501 op->ob_refcnt = 3;
502 if (PyDict_DelItem(interned, op) != 0)
503 Py_FatalError(
504 "deletion of interned string failed");
505 break;
506
507 case SSTATE_INTERNED_IMMORTAL:
508 Py_FatalError("Immortal interned string died.");
509
510 default:
511 Py_FatalError("Inconsistent interned string state.");
512 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000513 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000514}
515
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000516/* Unescape a backslash-escaped string. If unicode is non-zero,
517 the string is a u-literal. If recode_encoding is non-zero,
518 the string is UTF-8 encoded and should be re-encoded in the
519 specified encoding. */
520
521PyObject *PyString_DecodeEscape(const char *s,
522 int len,
523 const char *errors,
524 int unicode,
525 const char *recode_encoding)
526{
527 int c;
528 char *p, *buf;
529 const char *end;
530 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000531 int newlen = recode_encoding ? 4*len:len;
532 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000533 if (v == NULL)
534 return NULL;
535 p = buf = PyString_AsString(v);
536 end = s + len;
537 while (s < end) {
538 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000539 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540#ifdef Py_USING_UNICODE
541 if (recode_encoding && (*s & 0x80)) {
542 PyObject *u, *w;
543 char *r;
544 const char* t;
545 int rn;
546 t = s;
547 /* Decode non-ASCII bytes as UTF-8. */
548 while (t < end && (*t & 0x80)) t++;
549 u = PyUnicode_DecodeUTF8(s, t - s, errors);
550 if(!u) goto failed;
551
552 /* Recode them in target encoding. */
553 w = PyUnicode_AsEncodedString(
554 u, recode_encoding, errors);
555 Py_DECREF(u);
556 if (!w) goto failed;
557
558 /* Append bytes to output buffer. */
559 r = PyString_AsString(w);
560 rn = PyString_Size(w);
561 memcpy(p, r, rn);
562 p += rn;
563 Py_DECREF(w);
564 s = t;
565 } else {
566 *p++ = *s++;
567 }
568#else
569 *p++ = *s++;
570#endif
571 continue;
572 }
573 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000574 if (s==end) {
575 PyErr_SetString(PyExc_ValueError,
576 "Trailing \\ in string");
577 goto failed;
578 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000579 switch (*s++) {
580 /* XXX This assumes ASCII! */
581 case '\n': break;
582 case '\\': *p++ = '\\'; break;
583 case '\'': *p++ = '\''; break;
584 case '\"': *p++ = '\"'; break;
585 case 'b': *p++ = '\b'; break;
586 case 'f': *p++ = '\014'; break; /* FF */
587 case 't': *p++ = '\t'; break;
588 case 'n': *p++ = '\n'; break;
589 case 'r': *p++ = '\r'; break;
590 case 'v': *p++ = '\013'; break; /* VT */
591 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
592 case '0': case '1': case '2': case '3':
593 case '4': case '5': case '6': case '7':
594 c = s[-1] - '0';
595 if ('0' <= *s && *s <= '7') {
596 c = (c<<3) + *s++ - '0';
597 if ('0' <= *s && *s <= '7')
598 c = (c<<3) + *s++ - '0';
599 }
600 *p++ = c;
601 break;
602 case 'x':
603 if (isxdigit(Py_CHARMASK(s[0]))
604 && isxdigit(Py_CHARMASK(s[1]))) {
605 unsigned int x = 0;
606 c = Py_CHARMASK(*s);
607 s++;
608 if (isdigit(c))
609 x = c - '0';
610 else if (islower(c))
611 x = 10 + c - 'a';
612 else
613 x = 10 + c - 'A';
614 x = x << 4;
615 c = Py_CHARMASK(*s);
616 s++;
617 if (isdigit(c))
618 x += c - '0';
619 else if (islower(c))
620 x += 10 + c - 'a';
621 else
622 x += 10 + c - 'A';
623 *p++ = x;
624 break;
625 }
626 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000627 PyErr_SetString(PyExc_ValueError,
628 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000629 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000630 }
631 if (strcmp(errors, "replace") == 0) {
632 *p++ = '?';
633 } else if (strcmp(errors, "ignore") == 0)
634 /* do nothing */;
635 else {
636 PyErr_Format(PyExc_ValueError,
637 "decoding error; "
638 "unknown error handling code: %.400s",
639 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000640 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000641 }
642#ifndef Py_USING_UNICODE
643 case 'u':
644 case 'U':
645 case 'N':
646 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000647 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 "Unicode escapes not legal "
649 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000650 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000651 }
652#endif
653 default:
654 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000655 s--;
656 goto non_esc; /* an arbitry number of unescaped
657 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000658 }
659 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000660 if (p-buf < newlen)
661 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000662 return v;
663 failed:
664 Py_DECREF(v);
665 return NULL;
666}
667
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000668static int
669string_getsize(register PyObject *op)
670{
671 char *s;
672 int len;
673 if (PyString_AsStringAndSize(op, &s, &len))
674 return -1;
675 return len;
676}
677
678static /*const*/ char *
679string_getbuffer(register PyObject *op)
680{
681 char *s;
682 int len;
683 if (PyString_AsStringAndSize(op, &s, &len))
684 return NULL;
685 return s;
686}
687
Guido van Rossumd7047b31995-01-02 19:07:15 +0000688int
Fred Drakeba096332000-07-09 07:04:36 +0000689PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000690{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000691 if (!PyString_Check(op))
692 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000694}
695
696/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000697PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000699 if (!PyString_Check(op))
700 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702}
703
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704int
705PyString_AsStringAndSize(register PyObject *obj,
706 register char **s,
707 register int *len)
708{
709 if (s == NULL) {
710 PyErr_BadInternalCall();
711 return -1;
712 }
713
714 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000715#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (PyUnicode_Check(obj)) {
717 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
718 if (obj == NULL)
719 return -1;
720 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000721 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000722#endif
723 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 PyErr_Format(PyExc_TypeError,
725 "expected string or Unicode object, "
726 "%.200s found", obj->ob_type->tp_name);
727 return -1;
728 }
729 }
730
731 *s = PyString_AS_STRING(obj);
732 if (len != NULL)
733 *len = PyString_GET_SIZE(obj);
734 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
735 PyErr_SetString(PyExc_TypeError,
736 "expected string without null bytes");
737 return -1;
738 }
739 return 0;
740}
741
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742/* Methods */
743
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000744static int
Fred Drakeba096332000-07-09 07:04:36 +0000745string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000746{
747 int i;
748 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000749 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000750
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000751 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000752 if (! PyString_CheckExact(op)) {
753 int ret;
754 /* A str subclass may have its own __str__ method. */
755 op = (PyStringObject *) PyObject_Str((PyObject *)op);
756 if (op == NULL)
757 return -1;
758 ret = string_print(op, fp, flags);
759 Py_DECREF(op);
760 return ret;
761 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000762 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000763#ifdef __VMS
764 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
765#else
766 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
767#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000768 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000770
Thomas Wouters7e474022000-07-16 12:04:32 +0000771 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000772 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000773 if (memchr(op->ob_sval, '\'', op->ob_size) &&
774 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775 quote = '"';
776
777 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000778 for (i = 0; i < op->ob_size; i++) {
779 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000781 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000782 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000783 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000784 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000785 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000786 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000787 fprintf(fp, "\\r");
788 else if (c < ' ' || c >= 0x7f)
789 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000790 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000791 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000794 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000795}
796
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000797PyObject *
798PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000800 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000801 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000802 PyObject *v;
803 if (newsize > INT_MAX) {
804 PyErr_SetString(PyExc_OverflowError,
805 "string is too large to make repr");
806 }
807 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000809 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000810 }
811 else {
812 register int i;
813 register char c;
814 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815 int quote;
816
Thomas Wouters7e474022000-07-16 12:04:32 +0000817 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000818 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000819 if (smartquotes &&
820 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000821 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000822 quote = '"';
823
Tim Peters9161c8b2001-12-03 01:55:38 +0000824 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000825 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000826 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000827 /* There's at least enough room for a hex escape
828 and a closing quote. */
829 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000833 else if (c == '\t')
834 *p++ = '\\', *p++ = 't';
835 else if (c == '\n')
836 *p++ = '\\', *p++ = 'n';
837 else if (c == '\r')
838 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 else if (c < ' ' || c >= 0x7f) {
840 /* For performance, we don't want to call
841 PyOS_snprintf here (extra layers of
842 function call). */
843 sprintf(p, "\\x%02x", c & 0xff);
844 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000845 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 else
847 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000849 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000850 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000852 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000853 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000854 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856}
857
Guido van Rossum189f1df2001-05-01 16:51:53 +0000858static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000859string_repr(PyObject *op)
860{
861 return PyString_Repr(op, 1);
862}
863
864static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000865string_str(PyObject *s)
866{
Tim Petersc9933152001-10-16 20:18:24 +0000867 assert(PyString_Check(s));
868 if (PyString_CheckExact(s)) {
869 Py_INCREF(s);
870 return s;
871 }
872 else {
873 /* Subtype -- return genuine string with the same value. */
874 PyStringObject *t = (PyStringObject *) s;
875 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
876 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000877}
878
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879static int
Fred Drakeba096332000-07-09 07:04:36 +0000880string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881{
882 return a->ob_size;
883}
884
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000885static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000886string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887{
888 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000889 register PyStringObject *op;
890 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000891#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000892 if (PyUnicode_Check(bb))
893 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000894#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000895 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000896 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000897 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000898 return NULL;
899 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000900#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000902 if ((a->ob_size == 0 || b->ob_size == 0) &&
903 PyString_CheckExact(a) && PyString_CheckExact(b)) {
904 if (a->ob_size == 0) {
905 Py_INCREF(bb);
906 return bb;
907 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 Py_INCREF(a);
909 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000910 }
911 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000912 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000913 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000914 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000915 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000916 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000917 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000918 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000919 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
920 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
921 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000922 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923#undef b
924}
925
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000926static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000927string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000928{
929 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000930 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000931 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000933 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934 if (n < 0)
935 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000936 /* watch out for overflows: the size can overflow int,
937 * and the # of bytes needed can overflow size_t
938 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000940 if (n && size / n != a->ob_size) {
941 PyErr_SetString(PyExc_OverflowError,
942 "repeated string is too long");
943 return NULL;
944 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000945 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 Py_INCREF(a);
947 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 }
Tim Peterse7c05322004-06-27 17:24:49 +0000949 nbytes = (size_t)size;
950 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000951 PyErr_SetString(PyExc_OverflowError,
952 "repeated string is too long");
953 return NULL;
954 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000956 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000957 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000959 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000960 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000961 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000962 op->ob_sval[size] = '\0';
963 if (a->ob_size == 1 && n > 0) {
964 memset(op->ob_sval, a->ob_sval[0] , n);
965 return (PyObject *) op;
966 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000967 i = 0;
968 if (i < size) {
969 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
970 i = (int) a->ob_size;
971 }
972 while (i < size) {
973 j = (i <= size-i) ? i : size-i;
974 memcpy(op->ob_sval+i, op->ob_sval, j);
975 i += j;
976 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978}
979
980/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
981
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000983string_slice(register PyStringObject *a, register int i, register int j)
984 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985{
986 if (i < 0)
987 i = 0;
988 if (j < 0)
989 j = 0; /* Avoid signed/unsigned bug in next line */
990 if (j > a->ob_size)
991 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000992 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
993 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 Py_INCREF(a);
995 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 }
997 if (j < i)
998 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001000}
1001
Guido van Rossum9284a572000-03-07 15:53:43 +00001002static int
Fred Drakeba096332000-07-09 07:04:36 +00001003string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001004{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001005 char *s = PyString_AS_STRING(a);
1006 const char *sub = PyString_AS_STRING(el);
1007 char *last;
1008 int len_sub = PyString_GET_SIZE(el);
1009 int shortsub;
1010 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001011
1012 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001013#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014 if (PyUnicode_Check(el))
1015 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001016#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001017 if (!PyString_Check(el)) {
1018 PyErr_SetString(PyExc_TypeError,
1019 "'in <string>' requires string as left operand");
1020 return -1;
1021 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001022 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001023
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001024 if (len_sub == 0)
1025 return 1;
1026 /* last points to one char beyond the start of the rightmost
1027 substring. When s<last, there is still room for a possible match
1028 and s[0] through s[len_sub-1] will be in bounds.
1029 shortsub is len_sub minus the last character which is checked
1030 separately just before the memcmp(). That check helps prevent
1031 false starts and saves the setup time for memcmp().
1032 */
1033 firstchar = sub[0];
1034 shortsub = len_sub - 1;
1035 lastchar = sub[shortsub];
1036 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1037 while (s < last) {
1038 s = memchr(s, firstchar, last-s);
1039 if (s == NULL)
1040 return 0;
1041 assert(s < last);
1042 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001043 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001044 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001045 }
1046 return 0;
1047}
1048
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001050string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001052 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001053 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001054 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001056 return NULL;
1057 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001058 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001059 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001060 if (v == NULL)
1061 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001062 else {
1063#ifdef COUNT_ALLOCS
1064 one_strings++;
1065#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001066 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001067 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001068 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001069}
1070
Martin v. Löwiscd353062001-05-24 16:56:35 +00001071static PyObject*
1072string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001074 int c;
1075 int len_a, len_b;
1076 int min_len;
1077 PyObject *result;
1078
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001079 /* Make sure both arguments are strings. */
1080 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001081 result = Py_NotImplemented;
1082 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001083 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001084 if (a == b) {
1085 switch (op) {
1086 case Py_EQ:case Py_LE:case Py_GE:
1087 result = Py_True;
1088 goto out;
1089 case Py_NE:case Py_LT:case Py_GT:
1090 result = Py_False;
1091 goto out;
1092 }
1093 }
1094 if (op == Py_EQ) {
1095 /* Supporting Py_NE here as well does not save
1096 much time, since Py_NE is rarely used. */
1097 if (a->ob_size == b->ob_size
1098 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001099 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001100 a->ob_size) == 0)) {
1101 result = Py_True;
1102 } else {
1103 result = Py_False;
1104 }
1105 goto out;
1106 }
1107 len_a = a->ob_size; len_b = b->ob_size;
1108 min_len = (len_a < len_b) ? len_a : len_b;
1109 if (min_len > 0) {
1110 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1111 if (c==0)
1112 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1113 }else
1114 c = 0;
1115 if (c == 0)
1116 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1117 switch (op) {
1118 case Py_LT: c = c < 0; break;
1119 case Py_LE: c = c <= 0; break;
1120 case Py_EQ: assert(0); break; /* unreachable */
1121 case Py_NE: c = c != 0; break;
1122 case Py_GT: c = c > 0; break;
1123 case Py_GE: c = c >= 0; break;
1124 default:
1125 result = Py_NotImplemented;
1126 goto out;
1127 }
1128 result = c ? Py_True : Py_False;
1129 out:
1130 Py_INCREF(result);
1131 return result;
1132}
1133
1134int
1135_PyString_Eq(PyObject *o1, PyObject *o2)
1136{
1137 PyStringObject *a, *b;
1138 a = (PyStringObject*)o1;
1139 b = (PyStringObject*)o2;
1140 return a->ob_size == b->ob_size
1141 && *a->ob_sval == *b->ob_sval
1142 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001143}
1144
Guido van Rossum9bfef441993-03-29 10:43:31 +00001145static long
Fred Drakeba096332000-07-09 07:04:36 +00001146string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001147{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 register int len;
1149 register unsigned char *p;
1150 register long x;
1151
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001152 if (a->ob_shash != -1)
1153 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001154 len = a->ob_size;
1155 p = (unsigned char *) a->ob_sval;
1156 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001157 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001158 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001159 x ^= a->ob_size;
1160 if (x == -1)
1161 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001162 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001163 return x;
1164}
1165
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001166static PyObject*
1167string_subscript(PyStringObject* self, PyObject* item)
1168{
1169 if (PyInt_Check(item)) {
1170 long i = PyInt_AS_LONG(item);
1171 if (i < 0)
1172 i += PyString_GET_SIZE(self);
1173 return string_item(self,i);
1174 }
1175 else if (PyLong_Check(item)) {
1176 long i = PyLong_AsLong(item);
1177 if (i == -1 && PyErr_Occurred())
1178 return NULL;
1179 if (i < 0)
1180 i += PyString_GET_SIZE(self);
1181 return string_item(self,i);
1182 }
1183 else if (PySlice_Check(item)) {
1184 int start, stop, step, slicelength, cur, i;
1185 char* source_buf;
1186 char* result_buf;
1187 PyObject* result;
1188
1189 if (PySlice_GetIndicesEx((PySliceObject*)item,
1190 PyString_GET_SIZE(self),
1191 &start, &stop, &step, &slicelength) < 0) {
1192 return NULL;
1193 }
1194
1195 if (slicelength <= 0) {
1196 return PyString_FromStringAndSize("", 0);
1197 }
1198 else {
1199 source_buf = PyString_AsString((PyObject*)self);
1200 result_buf = PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001201 if (result_buf == NULL)
1202 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001203
1204 for (cur = start, i = 0; i < slicelength;
1205 cur += step, i++) {
1206 result_buf[i] = source_buf[cur];
1207 }
1208
1209 result = PyString_FromStringAndSize(result_buf,
1210 slicelength);
1211 PyMem_Free(result_buf);
1212 return result;
1213 }
1214 }
1215 else {
1216 PyErr_SetString(PyExc_TypeError,
1217 "string indices must be integers");
1218 return NULL;
1219 }
1220}
1221
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001222static int
Fred Drakeba096332000-07-09 07:04:36 +00001223string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001224{
1225 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001226 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001227 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228 return -1;
1229 }
1230 *ptr = (void *)self->ob_sval;
1231 return self->ob_size;
1232}
1233
1234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001236{
Guido van Rossum045e6881997-09-08 18:30:11 +00001237 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001238 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001239 return -1;
1240}
1241
1242static int
Fred Drakeba096332000-07-09 07:04:36 +00001243string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001244{
1245 if ( lenp )
1246 *lenp = self->ob_size;
1247 return 1;
1248}
1249
Guido van Rossum1db70701998-10-08 02:18:52 +00001250static int
Fred Drakeba096332000-07-09 07:04:36 +00001251string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001252{
1253 if ( index != 0 ) {
1254 PyErr_SetString(PyExc_SystemError,
1255 "accessing non-existent string segment");
1256 return -1;
1257 }
1258 *ptr = self->ob_sval;
1259 return self->ob_size;
1260}
1261
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001262static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001263 (inquiry)string_length, /*sq_length*/
1264 (binaryfunc)string_concat, /*sq_concat*/
1265 (intargfunc)string_repeat, /*sq_repeat*/
1266 (intargfunc)string_item, /*sq_item*/
1267 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001268 0, /*sq_ass_item*/
1269 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001270 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001271};
1272
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001273static PyMappingMethods string_as_mapping = {
1274 (inquiry)string_length,
1275 (binaryfunc)string_subscript,
1276 0,
1277};
1278
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001279static PyBufferProcs string_as_buffer = {
1280 (getreadbufferproc)string_buffer_getreadbuf,
1281 (getwritebufferproc)string_buffer_getwritebuf,
1282 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001283 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001284};
1285
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286
1287
1288#define LEFTSTRIP 0
1289#define RIGHTSTRIP 1
1290#define BOTHSTRIP 2
1291
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001292/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001293static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1294
1295#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001296
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001297#define SPLIT_APPEND(data, left, right) \
1298 str = PyString_FromStringAndSize((data) + (left), \
1299 (right) - (left)); \
1300 if (str == NULL) \
1301 goto onError; \
1302 if (PyList_Append(list, str)) { \
1303 Py_DECREF(str); \
1304 goto onError; \
1305 } \
1306 else \
1307 Py_DECREF(str);
1308
1309#define SPLIT_INSERT(data, left, right) \
1310 str = PyString_FromStringAndSize((data) + (left), \
1311 (right) - (left)); \
1312 if (str == NULL) \
1313 goto onError; \
1314 if (PyList_Insert(list, 0, str)) { \
1315 Py_DECREF(str); \
1316 goto onError; \
1317 } \
1318 else \
1319 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320
1321static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001322split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324 int i, j;
1325 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001326 PyObject *list = PyList_New(0);
1327
1328 if (list == NULL)
1329 return NULL;
1330
Guido van Rossum4c08d552000-03-10 22:55:18 +00001331 for (i = j = 0; i < len; ) {
1332 while (i < len && isspace(Py_CHARMASK(s[i])))
1333 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335 while (i < len && !isspace(Py_CHARMASK(s[i])))
1336 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001338 if (maxsplit-- <= 0)
1339 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001340 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341 while (i < len && isspace(Py_CHARMASK(s[i])))
1342 i++;
1343 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344 }
1345 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001346 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001348 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001350 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001351 Py_DECREF(list);
1352 return NULL;
1353}
1354
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001355static PyObject *
1356split_char(const char *s, int len, char ch, int maxcount)
1357{
1358 register int i, j;
1359 PyObject *str;
1360 PyObject *list = PyList_New(0);
1361
1362 if (list == NULL)
1363 return NULL;
1364
1365 for (i = j = 0; i < len; ) {
1366 if (s[i] == ch) {
1367 if (maxcount-- <= 0)
1368 break;
1369 SPLIT_APPEND(s, j, i);
1370 i = j = i + 1;
1371 } else
1372 i++;
1373 }
1374 if (j <= len) {
1375 SPLIT_APPEND(s, j, len);
1376 }
1377 return list;
1378
1379 onError:
1380 Py_DECREF(list);
1381 return NULL;
1382}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001384PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385"S.split([sep [,maxsplit]]) -> list of strings\n\
1386\n\
1387Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001388delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001389splits are done. If sep is not specified or is None, any\n\
1390whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391
1392static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001393string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001394{
1395 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001396 int maxsplit = -1;
1397 const char *s = PyString_AS_STRING(self), *sub;
1398 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399
Guido van Rossum4c08d552000-03-10 22:55:18 +00001400 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001402 if (maxsplit < 0)
1403 maxsplit = INT_MAX;
1404 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001406 if (PyString_Check(subobj)) {
1407 sub = PyString_AS_STRING(subobj);
1408 n = PyString_GET_SIZE(subobj);
1409 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001410#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001411 else if (PyUnicode_Check(subobj))
1412 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001413#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001414 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1415 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001416
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417 if (n == 0) {
1418 PyErr_SetString(PyExc_ValueError, "empty separator");
1419 return NULL;
1420 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001421 else if (n == 1)
1422 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423
1424 list = PyList_New(0);
1425 if (list == NULL)
1426 return NULL;
1427
1428 i = j = 0;
1429 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001430 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001431 if (maxsplit-- <= 0)
1432 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1434 if (item == NULL)
1435 goto fail;
1436 err = PyList_Append(list, item);
1437 Py_DECREF(item);
1438 if (err < 0)
1439 goto fail;
1440 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441 }
1442 else
1443 i++;
1444 }
1445 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1446 if (item == NULL)
1447 goto fail;
1448 err = PyList_Append(list, item);
1449 Py_DECREF(item);
1450 if (err < 0)
1451 goto fail;
1452
1453 return list;
1454
1455 fail:
1456 Py_DECREF(list);
1457 return NULL;
1458}
1459
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001460static PyObject *
1461rsplit_whitespace(const char *s, int len, int maxsplit)
1462{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 int i, j;
1464 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001465 PyObject *list = PyList_New(0);
1466
1467 if (list == NULL)
1468 return NULL;
1469
1470 for (i = j = len - 1; i >= 0; ) {
1471 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1472 i--;
1473 j = i;
1474 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1475 i--;
1476 if (j > i) {
1477 if (maxsplit-- <= 0)
1478 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001479 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001480 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1481 i--;
1482 j = i;
1483 }
1484 }
1485 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001486 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001487 }
1488 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001489 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001490 Py_DECREF(list);
1491 return NULL;
1492}
1493
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001494static PyObject *
1495rsplit_char(const char *s, int len, char ch, int maxcount)
1496{
1497 register int i, j;
1498 PyObject *str;
1499 PyObject *list = PyList_New(0);
1500
1501 if (list == NULL)
1502 return NULL;
1503
1504 for (i = j = len - 1; i >= 0; ) {
1505 if (s[i] == ch) {
1506 if (maxcount-- <= 0)
1507 break;
1508 SPLIT_INSERT(s, i + 1, j + 1);
1509 j = i = i - 1;
1510 } else
1511 i--;
1512 }
1513 if (j >= -1) {
1514 SPLIT_INSERT(s, 0, j + 1);
1515 }
1516 return list;
1517
1518 onError:
1519 Py_DECREF(list);
1520 return NULL;
1521}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001522
1523PyDoc_STRVAR(rsplit__doc__,
1524"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1525\n\
1526Return a list of the words in the string S, using sep as the\n\
1527delimiter string, starting at the end of the string and working\n\
1528to the front. If maxsplit is given, at most maxsplit splits are\n\
1529done. If sep is not specified or is None, any whitespace string\n\
1530is a separator.");
1531
1532static PyObject *
1533string_rsplit(PyStringObject *self, PyObject *args)
1534{
1535 int len = PyString_GET_SIZE(self), n, i, j, err;
1536 int maxsplit = -1;
1537 const char *s = PyString_AS_STRING(self), *sub;
1538 PyObject *list, *item, *subobj = Py_None;
1539
1540 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1541 return NULL;
1542 if (maxsplit < 0)
1543 maxsplit = INT_MAX;
1544 if (subobj == Py_None)
1545 return rsplit_whitespace(s, len, maxsplit);
1546 if (PyString_Check(subobj)) {
1547 sub = PyString_AS_STRING(subobj);
1548 n = PyString_GET_SIZE(subobj);
1549 }
1550#ifdef Py_USING_UNICODE
1551 else if (PyUnicode_Check(subobj))
1552 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1553#endif
1554 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1555 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001556
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001557 if (n == 0) {
1558 PyErr_SetString(PyExc_ValueError, "empty separator");
1559 return NULL;
1560 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001561 else if (n == 1)
1562 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001563
1564 list = PyList_New(0);
1565 if (list == NULL)
1566 return NULL;
1567
1568 j = len;
1569 i = j - n;
1570 while (i >= 0) {
1571 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1572 if (maxsplit-- <= 0)
1573 break;
1574 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1575 if (item == NULL)
1576 goto fail;
1577 err = PyList_Insert(list, 0, item);
1578 Py_DECREF(item);
1579 if (err < 0)
1580 goto fail;
1581 j = i;
1582 i -= n;
1583 }
1584 else
1585 i--;
1586 }
1587 item = PyString_FromStringAndSize(s, j);
1588 if (item == NULL)
1589 goto fail;
1590 err = PyList_Insert(list, 0, item);
1591 Py_DECREF(item);
1592 if (err < 0)
1593 goto fail;
1594
1595 return list;
1596
1597 fail:
1598 Py_DECREF(list);
1599 return NULL;
1600}
1601
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001603PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604"S.join(sequence) -> string\n\
1605\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001606Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001607sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608
1609static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001610string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611{
1612 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001613 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 char *p;
1616 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001617 size_t sz = 0;
1618 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001619 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620
Tim Peters19fe14e2001-01-19 03:03:47 +00001621 seq = PySequence_Fast(orig, "");
1622 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001623 if (PyErr_ExceptionMatches(PyExc_TypeError))
1624 PyErr_Format(PyExc_TypeError,
1625 "sequence expected, %.80s found",
1626 orig->ob_type->tp_name);
1627 return NULL;
1628 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001629
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001630 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001631 if (seqlen == 0) {
1632 Py_DECREF(seq);
1633 return PyString_FromString("");
1634 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001636 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001637 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1638 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001639 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001640 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001641 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001642 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001643
Raymond Hettinger674f2412004-08-23 23:23:54 +00001644 /* There are at least two things to join, or else we have a subclass
1645 * of the builtin types in the sequence.
1646 * Do a pre-pass to figure out the total amount of space we'll
1647 * need (sz), see whether any argument is absurd, and defer to
1648 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001649 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001650 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001651 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001652 item = PySequence_Fast_GET_ITEM(seq, i);
1653 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001654#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001655 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001656 /* Defer to Unicode join.
1657 * CAUTION: There's no gurantee that the
1658 * original sequence can be iterated over
1659 * again, so we must pass seq here.
1660 */
1661 PyObject *result;
1662 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001663 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001664 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001665 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001666#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001667 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001668 "sequence item %i: expected string,"
1669 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001670 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001671 Py_DECREF(seq);
1672 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001673 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001674 sz += PyString_GET_SIZE(item);
1675 if (i != 0)
1676 sz += seplen;
1677 if (sz < old_sz || sz > INT_MAX) {
1678 PyErr_SetString(PyExc_OverflowError,
1679 "join() is too long for a Python string");
1680 Py_DECREF(seq);
1681 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001682 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001683 }
1684
1685 /* Allocate result space. */
1686 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1687 if (res == NULL) {
1688 Py_DECREF(seq);
1689 return NULL;
1690 }
1691
1692 /* Catenate everything. */
1693 p = PyString_AS_STRING(res);
1694 for (i = 0; i < seqlen; ++i) {
1695 size_t n;
1696 item = PySequence_Fast_GET_ITEM(seq, i);
1697 n = PyString_GET_SIZE(item);
1698 memcpy(p, PyString_AS_STRING(item), n);
1699 p += n;
1700 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001701 memcpy(p, sep, seplen);
1702 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001703 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001705
Jeremy Hylton49048292000-07-11 03:28:17 +00001706 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001708}
1709
Tim Peters52e155e2001-06-16 05:42:57 +00001710PyObject *
1711_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001712{
Tim Petersa7259592001-06-16 05:11:17 +00001713 assert(sep != NULL && PyString_Check(sep));
1714 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001715 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001716}
1717
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001718static void
1719string_adjust_indices(int *start, int *end, int len)
1720{
1721 if (*end > len)
1722 *end = len;
1723 else if (*end < 0)
1724 *end += len;
1725 if (*end < 0)
1726 *end = 0;
1727 if (*start < 0)
1728 *start += len;
1729 if (*start < 0)
1730 *start = 0;
1731}
1732
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733static long
Fred Drakeba096332000-07-09 07:04:36 +00001734string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001736 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737 int len = PyString_GET_SIZE(self);
1738 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001739 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001741 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001742 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001743 return -2;
1744 if (PyString_Check(subobj)) {
1745 sub = PyString_AS_STRING(subobj);
1746 n = PyString_GET_SIZE(subobj);
1747 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001748#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001749 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001750 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001751#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001753 return -2;
1754
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001755 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756
Guido van Rossum4c08d552000-03-10 22:55:18 +00001757 if (dir > 0) {
1758 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 last -= n;
1761 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001762 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001763 return (long)i;
1764 }
1765 else {
1766 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001767
Guido van Rossum4c08d552000-03-10 22:55:18 +00001768 if (n == 0 && i <= last)
1769 return (long)last;
1770 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001771 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001772 return (long)j;
1773 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001774
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775 return -1;
1776}
1777
1778
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001779PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780"S.find(sub [,start [,end]]) -> int\n\
1781\n\
1782Return the lowest index in S where substring sub is found,\n\
1783such that sub is contained within s[start,end]. Optional\n\
1784arguments start and end are interpreted as in slice notation.\n\
1785\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001786Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787
1788static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001789string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001791 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792 if (result == -2)
1793 return NULL;
1794 return PyInt_FromLong(result);
1795}
1796
1797
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001798PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799"S.index(sub [,start [,end]]) -> int\n\
1800\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001801Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802
1803static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001804string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001806 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807 if (result == -2)
1808 return NULL;
1809 if (result == -1) {
1810 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001811 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812 return NULL;
1813 }
1814 return PyInt_FromLong(result);
1815}
1816
1817
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001818PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819"S.rfind(sub [,start [,end]]) -> int\n\
1820\n\
1821Return the highest index in S where substring sub is found,\n\
1822such that sub is contained within s[start,end]. Optional\n\
1823arguments start and end are interpreted as in slice notation.\n\
1824\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001825Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826
1827static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001828string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001830 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 if (result == -2)
1832 return NULL;
1833 return PyInt_FromLong(result);
1834}
1835
1836
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001837PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001838"S.rindex(sub [,start [,end]]) -> int\n\
1839\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001840Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841
1842static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001843string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001845 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846 if (result == -2)
1847 return NULL;
1848 if (result == -1) {
1849 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001850 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851 return NULL;
1852 }
1853 return PyInt_FromLong(result);
1854}
1855
1856
1857static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001858do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1859{
1860 char *s = PyString_AS_STRING(self);
1861 int len = PyString_GET_SIZE(self);
1862 char *sep = PyString_AS_STRING(sepobj);
1863 int seplen = PyString_GET_SIZE(sepobj);
1864 int i, j;
1865
1866 i = 0;
1867 if (striptype != RIGHTSTRIP) {
1868 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1869 i++;
1870 }
1871 }
1872
1873 j = len;
1874 if (striptype != LEFTSTRIP) {
1875 do {
1876 j--;
1877 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1878 j++;
1879 }
1880
1881 if (i == 0 && j == len && PyString_CheckExact(self)) {
1882 Py_INCREF(self);
1883 return (PyObject*)self;
1884 }
1885 else
1886 return PyString_FromStringAndSize(s+i, j-i);
1887}
1888
1889
1890static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001891do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892{
1893 char *s = PyString_AS_STRING(self);
1894 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896 i = 0;
1897 if (striptype != RIGHTSTRIP) {
1898 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1899 i++;
1900 }
1901 }
1902
1903 j = len;
1904 if (striptype != LEFTSTRIP) {
1905 do {
1906 j--;
1907 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1908 j++;
1909 }
1910
Tim Peters8fa5dd02001-09-12 02:18:30 +00001911 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912 Py_INCREF(self);
1913 return (PyObject*)self;
1914 }
1915 else
1916 return PyString_FromStringAndSize(s+i, j-i);
1917}
1918
1919
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001920static PyObject *
1921do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1922{
1923 PyObject *sep = NULL;
1924
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001925 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001926 return NULL;
1927
1928 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001929 if (PyString_Check(sep))
1930 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001931#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001932 else if (PyUnicode_Check(sep)) {
1933 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1934 PyObject *res;
1935 if (uniself==NULL)
1936 return NULL;
1937 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1938 striptype, sep);
1939 Py_DECREF(uniself);
1940 return res;
1941 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001942#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001943 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001944 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001945#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001946 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001947#else
1948 "%s arg must be None or str",
1949#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001950 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001951 return NULL;
1952 }
1953 return do_xstrip(self, striptype, sep);
1954 }
1955
1956 return do_strip(self, striptype);
1957}
1958
1959
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001960PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001961"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962\n\
1963Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001964whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001965If chars is given and not None, remove characters in chars instead.\n\
1966If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967
1968static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001971 if (PyTuple_GET_SIZE(args) == 0)
1972 return do_strip(self, BOTHSTRIP); /* Common case */
1973 else
1974 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975}
1976
1977
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001978PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001979"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001981Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001982If chars is given and not None, remove characters in chars instead.\n\
1983If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984
1985static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001986string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001988 if (PyTuple_GET_SIZE(args) == 0)
1989 return do_strip(self, LEFTSTRIP); /* Common case */
1990 else
1991 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992}
1993
1994
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001995PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001996"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001998Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001999If chars is given and not None, remove characters in chars instead.\n\
2000If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001
2002static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002003string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002005 if (PyTuple_GET_SIZE(args) == 0)
2006 return do_strip(self, RIGHTSTRIP); /* Common case */
2007 else
2008 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009}
2010
2011
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002012PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013"S.lower() -> string\n\
2014\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002015Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016
2017static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002018string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019{
2020 char *s = PyString_AS_STRING(self), *s_new;
2021 int i, n = PyString_GET_SIZE(self);
2022 PyObject *new;
2023
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024 new = PyString_FromStringAndSize(NULL, n);
2025 if (new == NULL)
2026 return NULL;
2027 s_new = PyString_AsString(new);
2028 for (i = 0; i < n; i++) {
2029 int c = Py_CHARMASK(*s++);
2030 if (isupper(c)) {
2031 *s_new = tolower(c);
2032 } else
2033 *s_new = c;
2034 s_new++;
2035 }
2036 return new;
2037}
2038
2039
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002040PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041"S.upper() -> string\n\
2042\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002043Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044
2045static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002046string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047{
2048 char *s = PyString_AS_STRING(self), *s_new;
2049 int i, n = PyString_GET_SIZE(self);
2050 PyObject *new;
2051
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052 new = PyString_FromStringAndSize(NULL, n);
2053 if (new == NULL)
2054 return NULL;
2055 s_new = PyString_AsString(new);
2056 for (i = 0; i < n; i++) {
2057 int c = Py_CHARMASK(*s++);
2058 if (islower(c)) {
2059 *s_new = toupper(c);
2060 } else
2061 *s_new = c;
2062 s_new++;
2063 }
2064 return new;
2065}
2066
2067
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002068PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069"S.title() -> string\n\
2070\n\
2071Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002072characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002073
2074static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002075string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002076{
2077 char *s = PyString_AS_STRING(self), *s_new;
2078 int i, n = PyString_GET_SIZE(self);
2079 int previous_is_cased = 0;
2080 PyObject *new;
2081
Guido van Rossum4c08d552000-03-10 22:55:18 +00002082 new = PyString_FromStringAndSize(NULL, n);
2083 if (new == NULL)
2084 return NULL;
2085 s_new = PyString_AsString(new);
2086 for (i = 0; i < n; i++) {
2087 int c = Py_CHARMASK(*s++);
2088 if (islower(c)) {
2089 if (!previous_is_cased)
2090 c = toupper(c);
2091 previous_is_cased = 1;
2092 } else if (isupper(c)) {
2093 if (previous_is_cased)
2094 c = tolower(c);
2095 previous_is_cased = 1;
2096 } else
2097 previous_is_cased = 0;
2098 *s_new++ = c;
2099 }
2100 return new;
2101}
2102
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002103PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104"S.capitalize() -> string\n\
2105\n\
2106Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002107capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108
2109static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002110string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111{
2112 char *s = PyString_AS_STRING(self), *s_new;
2113 int i, n = PyString_GET_SIZE(self);
2114 PyObject *new;
2115
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116 new = PyString_FromStringAndSize(NULL, n);
2117 if (new == NULL)
2118 return NULL;
2119 s_new = PyString_AsString(new);
2120 if (0 < n) {
2121 int c = Py_CHARMASK(*s++);
2122 if (islower(c))
2123 *s_new = toupper(c);
2124 else
2125 *s_new = c;
2126 s_new++;
2127 }
2128 for (i = 1; i < n; i++) {
2129 int c = Py_CHARMASK(*s++);
2130 if (isupper(c))
2131 *s_new = tolower(c);
2132 else
2133 *s_new = c;
2134 s_new++;
2135 }
2136 return new;
2137}
2138
2139
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002140PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141"S.count(sub[, start[, end]]) -> int\n\
2142\n\
2143Return the number of occurrences of substring sub in string\n\
2144S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002145interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146
2147static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002148string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002150 const char *s = PyString_AS_STRING(self), *sub, *t;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151 int len = PyString_GET_SIZE(self), n;
2152 int i = 0, last = INT_MAX;
2153 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002154 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155
Guido van Rossumc6821402000-05-08 14:08:05 +00002156 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2157 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002159
Guido van Rossum4c08d552000-03-10 22:55:18 +00002160 if (PyString_Check(subobj)) {
2161 sub = PyString_AS_STRING(subobj);
2162 n = PyString_GET_SIZE(subobj);
2163 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002164#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002165 else if (PyUnicode_Check(subobj)) {
2166 int count;
2167 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2168 if (count == -1)
2169 return NULL;
2170 else
2171 return PyInt_FromLong((long) count);
2172 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002173#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2175 return NULL;
2176
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002177 string_adjust_indices(&i, &last, len);
2178
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179 m = last + 1 - n;
2180 if (n == 0)
2181 return PyInt_FromLong((long) (m-i));
2182
2183 r = 0;
2184 while (i < m) {
2185 if (!memcmp(s+i, sub, n)) {
2186 r++;
2187 i += n;
2188 } else {
2189 i++;
2190 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002191 if (i >= m)
2192 break;
2193 t = memchr(s+i, sub[0], m-i);
2194 if (t == NULL)
2195 break;
2196 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002197 }
2198 return PyInt_FromLong((long) r);
2199}
2200
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002201PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202"S.swapcase() -> string\n\
2203\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002204Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002205converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002206
2207static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002208string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209{
2210 char *s = PyString_AS_STRING(self), *s_new;
2211 int i, n = PyString_GET_SIZE(self);
2212 PyObject *new;
2213
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214 new = PyString_FromStringAndSize(NULL, n);
2215 if (new == NULL)
2216 return NULL;
2217 s_new = PyString_AsString(new);
2218 for (i = 0; i < n; i++) {
2219 int c = Py_CHARMASK(*s++);
2220 if (islower(c)) {
2221 *s_new = toupper(c);
2222 }
2223 else if (isupper(c)) {
2224 *s_new = tolower(c);
2225 }
2226 else
2227 *s_new = c;
2228 s_new++;
2229 }
2230 return new;
2231}
2232
2233
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002234PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002235"S.translate(table [,deletechars]) -> string\n\
2236\n\
2237Return a copy of the string S, where all characters occurring\n\
2238in the optional argument deletechars are removed, and the\n\
2239remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002240translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241
2242static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002243string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002245 register char *input, *output;
2246 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 register int i, c, changed = 0;
2248 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002249 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250 int inlen, tablen, dellen = 0;
2251 PyObject *result;
2252 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002255 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002258
2259 if (PyString_Check(tableobj)) {
2260 table1 = PyString_AS_STRING(tableobj);
2261 tablen = PyString_GET_SIZE(tableobj);
2262 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002263#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002264 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002265 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002266 parameter; instead a mapping to None will cause characters
2267 to be deleted. */
2268 if (delobj != NULL) {
2269 PyErr_SetString(PyExc_TypeError,
2270 "deletions are implemented differently for unicode");
2271 return NULL;
2272 }
2273 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2274 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002275#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278
Martin v. Löwis00b61272002-12-12 20:03:19 +00002279 if (tablen != 256) {
2280 PyErr_SetString(PyExc_ValueError,
2281 "translation table must be 256 characters long");
2282 return NULL;
2283 }
2284
Guido van Rossum4c08d552000-03-10 22:55:18 +00002285 if (delobj != NULL) {
2286 if (PyString_Check(delobj)) {
2287 del_table = PyString_AS_STRING(delobj);
2288 dellen = PyString_GET_SIZE(delobj);
2289 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002290#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002291 else if (PyUnicode_Check(delobj)) {
2292 PyErr_SetString(PyExc_TypeError,
2293 "deletions are implemented differently for unicode");
2294 return NULL;
2295 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002296#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002297 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2298 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002299 }
2300 else {
2301 del_table = NULL;
2302 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303 }
2304
2305 table = table1;
2306 inlen = PyString_Size(input_obj);
2307 result = PyString_FromStringAndSize((char *)NULL, inlen);
2308 if (result == NULL)
2309 return NULL;
2310 output_start = output = PyString_AsString(result);
2311 input = PyString_AsString(input_obj);
2312
2313 if (dellen == 0) {
2314 /* If no deletions are required, use faster code */
2315 for (i = inlen; --i >= 0; ) {
2316 c = Py_CHARMASK(*input++);
2317 if (Py_CHARMASK((*output++ = table[c])) != c)
2318 changed = 1;
2319 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002320 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 return result;
2322 Py_DECREF(result);
2323 Py_INCREF(input_obj);
2324 return input_obj;
2325 }
2326
2327 for (i = 0; i < 256; i++)
2328 trans_table[i] = Py_CHARMASK(table[i]);
2329
2330 for (i = 0; i < dellen; i++)
2331 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2332
2333 for (i = inlen; --i >= 0; ) {
2334 c = Py_CHARMASK(*input++);
2335 if (trans_table[c] != -1)
2336 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2337 continue;
2338 changed = 1;
2339 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002340 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002341 Py_DECREF(result);
2342 Py_INCREF(input_obj);
2343 return input_obj;
2344 }
2345 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002346 if (inlen > 0)
2347 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002348 return result;
2349}
2350
2351
2352/* What follows is used for implementing replace(). Perry Stoll. */
2353
2354/*
2355 mymemfind
2356
2357 strstr replacement for arbitrary blocks of memory.
2358
Barry Warsaw51ac5802000-03-20 16:36:48 +00002359 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 contents of memory pointed to by PAT. Returns the index into MEM if
2361 found, or -1 if not found. If len of PAT is greater than length of
2362 MEM, the function returns -1.
2363*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002364static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002365mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366{
2367 register int ii;
2368
2369 /* pattern can not occur in the last pat_len-1 chars */
2370 len -= pat_len;
2371
2372 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002373 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374 return ii;
2375 }
2376 }
2377 return -1;
2378}
2379
2380/*
2381 mymemcnt
2382
2383 Return the number of distinct times PAT is found in MEM.
2384 meaning mem=1111 and pat==11 returns 2.
2385 mem=11111 and pat==11 also return 2.
2386 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002387static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002388mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002389{
2390 register int offset = 0;
2391 int nfound = 0;
2392
2393 while (len >= 0) {
2394 offset = mymemfind(mem, len, pat, pat_len);
2395 if (offset == -1)
2396 break;
2397 mem += offset + pat_len;
2398 len -= offset + pat_len;
2399 nfound++;
2400 }
2401 return nfound;
2402}
2403
2404/*
2405 mymemreplace
2406
Thomas Wouters7e474022000-07-16 12:04:32 +00002407 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408 replaced with SUB.
2409
Thomas Wouters7e474022000-07-16 12:04:32 +00002410 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411 of PAT in STR, then the original string is returned. Otherwise, a new
2412 string is allocated here and returned.
2413
2414 on return, out_len is:
2415 the length of output string, or
2416 -1 if the input string is returned, or
2417 unchanged if an error occurs (no memory).
2418
2419 return value is:
2420 the new string allocated locally, or
2421 NULL if an error occurred.
2422*/
2423static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002424mymemreplace(const char *str, int len, /* input string */
2425 const char *pat, int pat_len, /* pattern string to find */
2426 const char *sub, int sub_len, /* substitution string */
2427 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002428 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429{
2430 char *out_s;
2431 char *new_s;
2432 int nfound, offset, new_len;
2433
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002434 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002435 goto return_same;
2436
2437 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002438 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002439 if (count < 0)
2440 count = INT_MAX;
2441 else if (nfound > count)
2442 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002443 if (nfound == 0)
2444 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002445
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002447 if (new_len == 0) {
2448 /* Have to allocate something for the caller to free(). */
2449 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002450 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002451 return NULL;
2452 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002453 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002454 else {
2455 assert(new_len > 0);
2456 new_s = (char *)PyMem_MALLOC(new_len);
2457 if (new_s == NULL)
2458 return NULL;
2459 out_s = new_s;
2460
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002461 if (pat_len > 0) {
2462 for (; nfound > 0; --nfound) {
2463 /* find index of next instance of pattern */
2464 offset = mymemfind(str, len, pat, pat_len);
2465 if (offset == -1)
2466 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002467
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002468 /* copy non matching part of input string */
2469 memcpy(new_s, str, offset);
2470 str += offset + pat_len;
2471 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002472
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002473 /* copy substitute into the output string */
2474 new_s += offset;
2475 memcpy(new_s, sub, sub_len);
2476 new_s += sub_len;
2477 }
2478 /* copy any remaining values into output string */
2479 if (len > 0)
2480 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002481 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002482 else {
2483 for (;;++str, --len) {
2484 memcpy(new_s, sub, sub_len);
2485 new_s += sub_len;
2486 if (--nfound <= 0) {
2487 memcpy(new_s, str, len);
2488 break;
2489 }
2490 *new_s++ = *str;
2491 }
2492 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002493 }
2494 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002495 return out_s;
2496
2497 return_same:
2498 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002499 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002500}
2501
2502
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002503PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002504"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505\n\
2506Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002507old replaced by new. If the optional argument count is\n\
2508given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002509
2510static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002511string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002512{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513 const char *str = PyString_AS_STRING(self), *sub, *repl;
2514 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002515 const int len = PyString_GET_SIZE(self);
2516 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002518 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002519 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002520
Guido van Rossum4c08d552000-03-10 22:55:18 +00002521 if (!PyArg_ParseTuple(args, "OO|i:replace",
2522 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002523 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002524
2525 if (PyString_Check(subobj)) {
2526 sub = PyString_AS_STRING(subobj);
2527 sub_len = PyString_GET_SIZE(subobj);
2528 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002529#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002531 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002533#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002534 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2535 return NULL;
2536
2537 if (PyString_Check(replobj)) {
2538 repl = PyString_AS_STRING(replobj);
2539 repl_len = PyString_GET_SIZE(replobj);
2540 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002541#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002543 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002545#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002546 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2547 return NULL;
2548
Guido van Rossum4c08d552000-03-10 22:55:18 +00002549 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002550 if (new_s == NULL) {
2551 PyErr_NoMemory();
2552 return NULL;
2553 }
2554 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002555 if (PyString_CheckExact(self)) {
2556 /* we're returning another reference to self */
2557 new = (PyObject*)self;
2558 Py_INCREF(new);
2559 }
2560 else {
2561 new = PyString_FromStringAndSize(str, len);
2562 if (new == NULL)
2563 return NULL;
2564 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002565 }
2566 else {
2567 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002568 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002569 }
2570 return new;
2571}
2572
2573
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002574PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002575"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002576\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002577Return True if S starts with the specified prefix, False otherwise.\n\
2578With optional start, test S beginning at that position.\n\
2579With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002580
2581static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002582string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002583{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002584 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002585 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002586 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002587 int plen;
2588 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002589 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002591
Guido van Rossumc6821402000-05-08 14:08:05 +00002592 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2593 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002594 return NULL;
2595 if (PyString_Check(subobj)) {
2596 prefix = PyString_AS_STRING(subobj);
2597 plen = PyString_GET_SIZE(subobj);
2598 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002599#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002600 else if (PyUnicode_Check(subobj)) {
2601 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002602 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002603 subobj, start, end, -1);
2604 if (rc == -1)
2605 return NULL;
2606 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002607 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002608 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002609#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002610 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002611 return NULL;
2612
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002613 string_adjust_indices(&start, &end, len);
2614
2615 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002616 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002617
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002618 if (end-start >= plen)
2619 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2620 else
2621 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002622}
2623
2624
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002625PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002626"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002627\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002628Return True if S ends with the specified suffix, False otherwise.\n\
2629With optional start, test S beginning at that position.\n\
2630With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002631
2632static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002633string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002634{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002636 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002637 const char* suffix;
2638 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002639 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002640 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002641 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002642
Guido van Rossumc6821402000-05-08 14:08:05 +00002643 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2644 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002645 return NULL;
2646 if (PyString_Check(subobj)) {
2647 suffix = PyString_AS_STRING(subobj);
2648 slen = PyString_GET_SIZE(subobj);
2649 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002650#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002651 else if (PyUnicode_Check(subobj)) {
2652 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002653 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002654 subobj, start, end, +1);
2655 if (rc == -1)
2656 return NULL;
2657 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002658 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002659 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002660#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002661 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002662 return NULL;
2663
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002664 string_adjust_indices(&start, &end, len);
2665
2666 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002667 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002668
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002669 if (end-slen > start)
2670 start = end - slen;
2671 if (end-start >= slen)
2672 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2673 else
2674 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002675}
2676
2677
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002678PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002679"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002680\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002681Encodes S using the codec registered for encoding. encoding defaults\n\
2682to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002683handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002684a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2685'xmlcharrefreplace' as well as any other name registered with\n\
2686codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002687
2688static PyObject *
2689string_encode(PyStringObject *self, PyObject *args)
2690{
2691 char *encoding = NULL;
2692 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002693 PyObject *v;
2694
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002695 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2696 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002697 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002698 if (v == NULL)
2699 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002700 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2701 PyErr_Format(PyExc_TypeError,
2702 "encoder did not return a string/unicode object "
2703 "(type=%.400s)",
2704 v->ob_type->tp_name);
2705 Py_DECREF(v);
2706 return NULL;
2707 }
2708 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002709
2710 onError:
2711 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002712}
2713
2714
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002715PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002716"S.decode([encoding[,errors]]) -> object\n\
2717\n\
2718Decodes S using the codec registered for encoding. encoding defaults\n\
2719to the default encoding. errors may be given to set a different error\n\
2720handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002721a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2722as well as any other name registerd with codecs.register_error that is\n\
2723able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002724
2725static PyObject *
2726string_decode(PyStringObject *self, PyObject *args)
2727{
2728 char *encoding = NULL;
2729 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002730 PyObject *v;
2731
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002732 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2733 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002734 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002735 if (v == NULL)
2736 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002737 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2738 PyErr_Format(PyExc_TypeError,
2739 "decoder did not return a string/unicode object "
2740 "(type=%.400s)",
2741 v->ob_type->tp_name);
2742 Py_DECREF(v);
2743 return NULL;
2744 }
2745 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002746
2747 onError:
2748 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002749}
2750
2751
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002752PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002753"S.expandtabs([tabsize]) -> string\n\
2754\n\
2755Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002756If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002757
2758static PyObject*
2759string_expandtabs(PyStringObject *self, PyObject *args)
2760{
2761 const char *e, *p;
2762 char *q;
2763 int i, j;
2764 PyObject *u;
2765 int tabsize = 8;
2766
2767 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2768 return NULL;
2769
Thomas Wouters7e474022000-07-16 12:04:32 +00002770 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002771 i = j = 0;
2772 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2773 for (p = PyString_AS_STRING(self); p < e; p++)
2774 if (*p == '\t') {
2775 if (tabsize > 0)
2776 j += tabsize - (j % tabsize);
2777 }
2778 else {
2779 j++;
2780 if (*p == '\n' || *p == '\r') {
2781 i += j;
2782 j = 0;
2783 }
2784 }
2785
2786 /* Second pass: create output string and fill it */
2787 u = PyString_FromStringAndSize(NULL, i + j);
2788 if (!u)
2789 return NULL;
2790
2791 j = 0;
2792 q = PyString_AS_STRING(u);
2793
2794 for (p = PyString_AS_STRING(self); p < e; p++)
2795 if (*p == '\t') {
2796 if (tabsize > 0) {
2797 i = tabsize - (j % tabsize);
2798 j += i;
2799 while (i--)
2800 *q++ = ' ';
2801 }
2802 }
2803 else {
2804 j++;
2805 *q++ = *p;
2806 if (*p == '\n' || *p == '\r')
2807 j = 0;
2808 }
2809
2810 return u;
2811}
2812
Tim Peters8fa5dd02001-09-12 02:18:30 +00002813static PyObject *
2814pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002815{
2816 PyObject *u;
2817
2818 if (left < 0)
2819 left = 0;
2820 if (right < 0)
2821 right = 0;
2822
Tim Peters8fa5dd02001-09-12 02:18:30 +00002823 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002824 Py_INCREF(self);
2825 return (PyObject *)self;
2826 }
2827
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002828 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002829 left + PyString_GET_SIZE(self) + right);
2830 if (u) {
2831 if (left)
2832 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002833 memcpy(PyString_AS_STRING(u) + left,
2834 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002835 PyString_GET_SIZE(self));
2836 if (right)
2837 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2838 fill, right);
2839 }
2840
2841 return u;
2842}
2843
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002844PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002845"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002846"\n"
2847"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002848"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002849
2850static PyObject *
2851string_ljust(PyStringObject *self, PyObject *args)
2852{
2853 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002854 char fillchar = ' ';
2855
2856 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002857 return NULL;
2858
Tim Peters8fa5dd02001-09-12 02:18:30 +00002859 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002860 Py_INCREF(self);
2861 return (PyObject*) self;
2862 }
2863
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002864 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002865}
2866
2867
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002868PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002869"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002870"\n"
2871"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002872"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002873
2874static PyObject *
2875string_rjust(PyStringObject *self, PyObject *args)
2876{
2877 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002878 char fillchar = ' ';
2879
2880 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002881 return NULL;
2882
Tim Peters8fa5dd02001-09-12 02:18:30 +00002883 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002884 Py_INCREF(self);
2885 return (PyObject*) self;
2886 }
2887
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002888 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002889}
2890
2891
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002892PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002893"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002894"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002895"Return S centered in a string of length width. Padding is\n"
2896"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002897
2898static PyObject *
2899string_center(PyStringObject *self, PyObject *args)
2900{
2901 int marg, left;
2902 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002903 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002904
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002905 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002906 return NULL;
2907
Tim Peters8fa5dd02001-09-12 02:18:30 +00002908 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002909 Py_INCREF(self);
2910 return (PyObject*) self;
2911 }
2912
2913 marg = width - PyString_GET_SIZE(self);
2914 left = marg / 2 + (marg & width & 1);
2915
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002916 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002917}
2918
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002919PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002920"S.zfill(width) -> string\n"
2921"\n"
2922"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002923"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002924
2925static PyObject *
2926string_zfill(PyStringObject *self, PyObject *args)
2927{
2928 int fill;
2929 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002930 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002931
2932 int width;
2933 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2934 return NULL;
2935
2936 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002937 if (PyString_CheckExact(self)) {
2938 Py_INCREF(self);
2939 return (PyObject*) self;
2940 }
2941 else
2942 return PyString_FromStringAndSize(
2943 PyString_AS_STRING(self),
2944 PyString_GET_SIZE(self)
2945 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002946 }
2947
2948 fill = width - PyString_GET_SIZE(self);
2949
2950 s = pad(self, fill, 0, '0');
2951
2952 if (s == NULL)
2953 return NULL;
2954
2955 p = PyString_AS_STRING(s);
2956 if (p[fill] == '+' || p[fill] == '-') {
2957 /* move sign to beginning of string */
2958 p[0] = p[fill];
2959 p[fill] = '0';
2960 }
2961
2962 return (PyObject*) s;
2963}
2964
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002965PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002966"S.isspace() -> bool\n\
2967\n\
2968Return True if all characters in S are whitespace\n\
2969and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002970
2971static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002972string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002973{
Fred Drakeba096332000-07-09 07:04:36 +00002974 register const unsigned char *p
2975 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002976 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002977
Guido van Rossum4c08d552000-03-10 22:55:18 +00002978 /* Shortcut for single character strings */
2979 if (PyString_GET_SIZE(self) == 1 &&
2980 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002981 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002982
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002983 /* Special case for empty strings */
2984 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002985 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002986
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987 e = p + PyString_GET_SIZE(self);
2988 for (; p < e; p++) {
2989 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002990 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002991 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002992 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002993}
2994
2995
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002996PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002997"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002998\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002999Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003000and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003001
3002static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003003string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003004{
Fred Drakeba096332000-07-09 07:04:36 +00003005 register const unsigned char *p
3006 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003007 register const unsigned char *e;
3008
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003009 /* Shortcut for single character strings */
3010 if (PyString_GET_SIZE(self) == 1 &&
3011 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003012 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003013
3014 /* Special case for empty strings */
3015 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003016 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003017
3018 e = p + PyString_GET_SIZE(self);
3019 for (; p < e; p++) {
3020 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003021 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003022 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003023 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003024}
3025
3026
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003027PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003028"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003029\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003030Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003031and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003032
3033static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003034string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003035{
Fred Drakeba096332000-07-09 07:04:36 +00003036 register const unsigned char *p
3037 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003038 register const unsigned char *e;
3039
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003040 /* Shortcut for single character strings */
3041 if (PyString_GET_SIZE(self) == 1 &&
3042 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003043 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003044
3045 /* Special case for empty strings */
3046 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003047 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003048
3049 e = p + PyString_GET_SIZE(self);
3050 for (; p < e; p++) {
3051 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003052 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003053 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003054 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003055}
3056
3057
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003058PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003059"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003060\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003061Return True if all characters in S are digits\n\
3062and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003063
3064static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003065string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003066{
Fred Drakeba096332000-07-09 07:04:36 +00003067 register const unsigned char *p
3068 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003069 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071 /* Shortcut for single character strings */
3072 if (PyString_GET_SIZE(self) == 1 &&
3073 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003074 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003076 /* Special case for empty strings */
3077 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003078 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003079
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080 e = p + PyString_GET_SIZE(self);
3081 for (; p < e; p++) {
3082 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003083 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003084 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003085 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086}
3087
3088
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003089PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003090"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003091\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003092Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003093at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003094
3095static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003096string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003097{
Fred Drakeba096332000-07-09 07:04:36 +00003098 register const unsigned char *p
3099 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003100 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003101 int cased;
3102
Guido van Rossum4c08d552000-03-10 22:55:18 +00003103 /* Shortcut for single character strings */
3104 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003105 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003106
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003107 /* Special case for empty strings */
3108 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003109 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003110
Guido van Rossum4c08d552000-03-10 22:55:18 +00003111 e = p + PyString_GET_SIZE(self);
3112 cased = 0;
3113 for (; p < e; p++) {
3114 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003115 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003116 else if (!cased && islower(*p))
3117 cased = 1;
3118 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003119 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003120}
3121
3122
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003123PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003124"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003125\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003126Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003127at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003128
3129static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003130string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003131{
Fred Drakeba096332000-07-09 07:04:36 +00003132 register const unsigned char *p
3133 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003134 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003135 int cased;
3136
Guido van Rossum4c08d552000-03-10 22:55:18 +00003137 /* Shortcut for single character strings */
3138 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003139 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003140
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003141 /* Special case for empty strings */
3142 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003143 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003144
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145 e = p + PyString_GET_SIZE(self);
3146 cased = 0;
3147 for (; p < e; p++) {
3148 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003149 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150 else if (!cased && isupper(*p))
3151 cased = 1;
3152 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003153 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154}
3155
3156
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003157PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003158"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003159\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003160Return True if S is a titlecased string and there is at least one\n\
3161character in S, i.e. uppercase characters may only follow uncased\n\
3162characters and lowercase characters only cased ones. Return False\n\
3163otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003164
3165static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003166string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167{
Fred Drakeba096332000-07-09 07:04:36 +00003168 register const unsigned char *p
3169 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003170 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003171 int cased, previous_is_cased;
3172
Guido van Rossum4c08d552000-03-10 22:55:18 +00003173 /* Shortcut for single character strings */
3174 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003175 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003177 /* Special case for empty strings */
3178 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003179 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003180
Guido van Rossum4c08d552000-03-10 22:55:18 +00003181 e = p + PyString_GET_SIZE(self);
3182 cased = 0;
3183 previous_is_cased = 0;
3184 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003185 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186
3187 if (isupper(ch)) {
3188 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003189 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003190 previous_is_cased = 1;
3191 cased = 1;
3192 }
3193 else if (islower(ch)) {
3194 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003195 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196 previous_is_cased = 1;
3197 cased = 1;
3198 }
3199 else
3200 previous_is_cased = 0;
3201 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003202 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003203}
3204
3205
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003206PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003207"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208\n\
3209Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003210Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003211is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003212
Guido van Rossum4c08d552000-03-10 22:55:18 +00003213static PyObject*
3214string_splitlines(PyStringObject *self, PyObject *args)
3215{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003216 register int i;
3217 register int j;
3218 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003219 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003220 PyObject *list;
3221 PyObject *str;
3222 char *data;
3223
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003224 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003225 return NULL;
3226
3227 data = PyString_AS_STRING(self);
3228 len = PyString_GET_SIZE(self);
3229
Guido van Rossum4c08d552000-03-10 22:55:18 +00003230 list = PyList_New(0);
3231 if (!list)
3232 goto onError;
3233
3234 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003235 int eol;
3236
Guido van Rossum4c08d552000-03-10 22:55:18 +00003237 /* Find a line and append it */
3238 while (i < len && data[i] != '\n' && data[i] != '\r')
3239 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003240
3241 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003242 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003243 if (i < len) {
3244 if (data[i] == '\r' && i + 1 < len &&
3245 data[i+1] == '\n')
3246 i += 2;
3247 else
3248 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003249 if (keepends)
3250 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003251 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003252 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003253 j = i;
3254 }
3255 if (j < len) {
3256 SPLIT_APPEND(data, j, len);
3257 }
3258
3259 return list;
3260
3261 onError:
3262 Py_DECREF(list);
3263 return NULL;
3264}
3265
3266#undef SPLIT_APPEND
3267
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003268static PyObject *
3269string_getnewargs(PyStringObject *v)
3270{
3271 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3272}
3273
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003274
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003275static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003276string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003277 /* Counterparts of the obsolete stropmodule functions; except
3278 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003279 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3280 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003281 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003282 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3283 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003284 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3285 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3286 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3287 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3288 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3289 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3290 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003291 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3292 capitalize__doc__},
3293 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3294 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3295 endswith__doc__},
3296 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3297 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3298 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3299 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3300 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3301 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3302 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3303 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3304 startswith__doc__},
3305 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3306 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3307 swapcase__doc__},
3308 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3309 translate__doc__},
3310 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3311 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3312 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3313 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3314 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3315 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3316 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3317 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3318 expandtabs__doc__},
3319 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3320 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003321 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003322 {NULL, NULL} /* sentinel */
3323};
3324
Jeremy Hylton938ace62002-07-17 16:30:39 +00003325static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003326str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3327
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003328static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003329string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003330{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003331 PyObject *x = NULL;
3332 static char *kwlist[] = {"object", 0};
3333
Guido van Rossumae960af2001-08-30 03:11:59 +00003334 if (type != &PyString_Type)
3335 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003336 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3337 return NULL;
3338 if (x == NULL)
3339 return PyString_FromString("");
3340 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003341}
3342
Guido van Rossumae960af2001-08-30 03:11:59 +00003343static PyObject *
3344str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3345{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003346 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003347 int n;
3348
3349 assert(PyType_IsSubtype(type, &PyString_Type));
3350 tmp = string_new(&PyString_Type, args, kwds);
3351 if (tmp == NULL)
3352 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003353 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003354 n = PyString_GET_SIZE(tmp);
3355 pnew = type->tp_alloc(type, n);
3356 if (pnew != NULL) {
3357 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003358 ((PyStringObject *)pnew)->ob_shash =
3359 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003360 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003361 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003362 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003363 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003364}
3365
Guido van Rossumcacfc072002-05-24 19:01:59 +00003366static PyObject *
3367basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3368{
3369 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003370 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003371 return NULL;
3372}
3373
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003374static PyObject *
3375string_mod(PyObject *v, PyObject *w)
3376{
3377 if (!PyString_Check(v)) {
3378 Py_INCREF(Py_NotImplemented);
3379 return Py_NotImplemented;
3380 }
3381 return PyString_Format(v, w);
3382}
3383
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003384PyDoc_STRVAR(basestring_doc,
3385"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003386
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003387static PyNumberMethods string_as_number = {
3388 0, /*nb_add*/
3389 0, /*nb_subtract*/
3390 0, /*nb_multiply*/
3391 0, /*nb_divide*/
3392 string_mod, /*nb_remainder*/
3393};
3394
3395
Guido van Rossumcacfc072002-05-24 19:01:59 +00003396PyTypeObject PyBaseString_Type = {
3397 PyObject_HEAD_INIT(&PyType_Type)
3398 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003399 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003400 0,
3401 0,
3402 0, /* tp_dealloc */
3403 0, /* tp_print */
3404 0, /* tp_getattr */
3405 0, /* tp_setattr */
3406 0, /* tp_compare */
3407 0, /* tp_repr */
3408 0, /* tp_as_number */
3409 0, /* tp_as_sequence */
3410 0, /* tp_as_mapping */
3411 0, /* tp_hash */
3412 0, /* tp_call */
3413 0, /* tp_str */
3414 0, /* tp_getattro */
3415 0, /* tp_setattro */
3416 0, /* tp_as_buffer */
3417 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3418 basestring_doc, /* tp_doc */
3419 0, /* tp_traverse */
3420 0, /* tp_clear */
3421 0, /* tp_richcompare */
3422 0, /* tp_weaklistoffset */
3423 0, /* tp_iter */
3424 0, /* tp_iternext */
3425 0, /* tp_methods */
3426 0, /* tp_members */
3427 0, /* tp_getset */
3428 &PyBaseObject_Type, /* tp_base */
3429 0, /* tp_dict */
3430 0, /* tp_descr_get */
3431 0, /* tp_descr_set */
3432 0, /* tp_dictoffset */
3433 0, /* tp_init */
3434 0, /* tp_alloc */
3435 basestring_new, /* tp_new */
3436 0, /* tp_free */
3437};
3438
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003439PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003440"str(object) -> string\n\
3441\n\
3442Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003443If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003444
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003445PyTypeObject PyString_Type = {
3446 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003447 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003448 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003449 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003450 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003451 (destructor)string_dealloc, /* tp_dealloc */
3452 (printfunc)string_print, /* tp_print */
3453 0, /* tp_getattr */
3454 0, /* tp_setattr */
3455 0, /* tp_compare */
3456 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003457 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003458 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003459 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003460 (hashfunc)string_hash, /* tp_hash */
3461 0, /* tp_call */
3462 (reprfunc)string_str, /* tp_str */
3463 PyObject_GenericGetAttr, /* tp_getattro */
3464 0, /* tp_setattro */
3465 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003466 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3467 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003468 string_doc, /* tp_doc */
3469 0, /* tp_traverse */
3470 0, /* tp_clear */
3471 (richcmpfunc)string_richcompare, /* tp_richcompare */
3472 0, /* tp_weaklistoffset */
3473 0, /* tp_iter */
3474 0, /* tp_iternext */
3475 string_methods, /* tp_methods */
3476 0, /* tp_members */
3477 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003478 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003479 0, /* tp_dict */
3480 0, /* tp_descr_get */
3481 0, /* tp_descr_set */
3482 0, /* tp_dictoffset */
3483 0, /* tp_init */
3484 0, /* tp_alloc */
3485 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003486 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003487};
3488
3489void
Fred Drakeba096332000-07-09 07:04:36 +00003490PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003491{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003492 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003493 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003494 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003495 if (w == NULL || !PyString_Check(*pv)) {
3496 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003497 *pv = NULL;
3498 return;
3499 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003500 v = string_concat((PyStringObject *) *pv, w);
3501 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003502 *pv = v;
3503}
3504
Guido van Rossum013142a1994-08-30 08:19:36 +00003505void
Fred Drakeba096332000-07-09 07:04:36 +00003506PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003507{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003508 PyString_Concat(pv, w);
3509 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003510}
3511
3512
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003513/* The following function breaks the notion that strings are immutable:
3514 it changes the size of a string. We get away with this only if there
3515 is only one module referencing the object. You can also think of it
3516 as creating a new string object and destroying the old one, only
3517 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003518 already be known to some other part of the code...
3519 Note that if there's not enough memory to resize the string, the original
3520 string object at *pv is deallocated, *pv is set to NULL, an "out of
3521 memory" exception is set, and -1 is returned. Else (on success) 0 is
3522 returned, and the value in *pv may or may not be the same as on input.
3523 As always, an extra byte is allocated for a trailing \0 byte (newsize
3524 does *not* include that), and a trailing \0 byte is stored.
3525*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003526
3527int
Fred Drakeba096332000-07-09 07:04:36 +00003528_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003529{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003530 register PyObject *v;
3531 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003532 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003533 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3534 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003535 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003536 Py_DECREF(v);
3537 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003538 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003539 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003540 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003541 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003542 _Py_ForgetReference(v);
3543 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003544 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003545 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003546 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003547 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003548 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003549 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003550 _Py_NewReference(*pv);
3551 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003552 sv->ob_size = newsize;
3553 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003554 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003555 return 0;
3556}
Guido van Rossume5372401993-03-16 12:15:04 +00003557
3558/* Helpers for formatstring */
3559
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003560static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003561getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003562{
3563 int argidx = *p_argidx;
3564 if (argidx < arglen) {
3565 (*p_argidx)++;
3566 if (arglen < 0)
3567 return args;
3568 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003569 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003570 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003571 PyErr_SetString(PyExc_TypeError,
3572 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003573 return NULL;
3574}
3575
Tim Peters38fd5b62000-09-21 05:43:11 +00003576/* Format codes
3577 * F_LJUST '-'
3578 * F_SIGN '+'
3579 * F_BLANK ' '
3580 * F_ALT '#'
3581 * F_ZERO '0'
3582 */
Guido van Rossume5372401993-03-16 12:15:04 +00003583#define F_LJUST (1<<0)
3584#define F_SIGN (1<<1)
3585#define F_BLANK (1<<2)
3586#define F_ALT (1<<3)
3587#define F_ZERO (1<<4)
3588
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003589static int
Fred Drakeba096332000-07-09 07:04:36 +00003590formatfloat(char *buf, size_t buflen, int flags,
3591 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003592{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003593 /* fmt = '%#.' + `prec` + `type`
3594 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003595 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003596 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003597 x = PyFloat_AsDouble(v);
3598 if (x == -1.0 && PyErr_Occurred()) {
3599 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003600 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003601 }
Guido van Rossume5372401993-03-16 12:15:04 +00003602 if (prec < 0)
3603 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003604 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3605 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003606 /* Worst case length calc to ensure no buffer overrun:
3607
3608 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003609 fmt = %#.<prec>g
3610 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003611 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003612 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003613
3614 'f' formats:
3615 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3616 len = 1 + 50 + 1 + prec = 52 + prec
3617
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003618 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003619 always given), therefore increase the length by one.
3620
3621 */
3622 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3623 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003624 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003625 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003626 return -1;
3627 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003628 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3629 (flags&F_ALT) ? "#" : "",
3630 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003631 PyOS_ascii_formatd(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003632 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003633}
3634
Tim Peters38fd5b62000-09-21 05:43:11 +00003635/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3636 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3637 * Python's regular ints.
3638 * Return value: a new PyString*, or NULL if error.
3639 * . *pbuf is set to point into it,
3640 * *plen set to the # of chars following that.
3641 * Caller must decref it when done using pbuf.
3642 * The string starting at *pbuf is of the form
3643 * "-"? ("0x" | "0X")? digit+
3644 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003645 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003646 * There will be at least prec digits, zero-filled on the left if
3647 * necessary to get that many.
3648 * val object to be converted
3649 * flags bitmask of format flags; only F_ALT is looked at
3650 * prec minimum number of digits; 0-fill on left if needed
3651 * type a character in [duoxX]; u acts the same as d
3652 *
3653 * CAUTION: o, x and X conversions on regular ints can never
3654 * produce a '-' sign, but can for Python's unbounded ints.
3655 */
3656PyObject*
3657_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3658 char **pbuf, int *plen)
3659{
3660 PyObject *result = NULL;
3661 char *buf;
3662 int i;
3663 int sign; /* 1 if '-', else 0 */
3664 int len; /* number of characters */
3665 int numdigits; /* len == numnondigits + numdigits */
3666 int numnondigits = 0;
3667
3668 switch (type) {
3669 case 'd':
3670 case 'u':
3671 result = val->ob_type->tp_str(val);
3672 break;
3673 case 'o':
3674 result = val->ob_type->tp_as_number->nb_oct(val);
3675 break;
3676 case 'x':
3677 case 'X':
3678 numnondigits = 2;
3679 result = val->ob_type->tp_as_number->nb_hex(val);
3680 break;
3681 default:
3682 assert(!"'type' not in [duoxX]");
3683 }
3684 if (!result)
3685 return NULL;
3686
3687 /* To modify the string in-place, there can only be one reference. */
3688 if (result->ob_refcnt != 1) {
3689 PyErr_BadInternalCall();
3690 return NULL;
3691 }
3692 buf = PyString_AsString(result);
3693 len = PyString_Size(result);
3694 if (buf[len-1] == 'L') {
3695 --len;
3696 buf[len] = '\0';
3697 }
3698 sign = buf[0] == '-';
3699 numnondigits += sign;
3700 numdigits = len - numnondigits;
3701 assert(numdigits > 0);
3702
Tim Petersfff53252001-04-12 18:38:48 +00003703 /* Get rid of base marker unless F_ALT */
3704 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003705 /* Need to skip 0x, 0X or 0. */
3706 int skipped = 0;
3707 switch (type) {
3708 case 'o':
3709 assert(buf[sign] == '0');
3710 /* If 0 is only digit, leave it alone. */
3711 if (numdigits > 1) {
3712 skipped = 1;
3713 --numdigits;
3714 }
3715 break;
3716 case 'x':
3717 case 'X':
3718 assert(buf[sign] == '0');
3719 assert(buf[sign + 1] == 'x');
3720 skipped = 2;
3721 numnondigits -= 2;
3722 break;
3723 }
3724 if (skipped) {
3725 buf += skipped;
3726 len -= skipped;
3727 if (sign)
3728 buf[0] = '-';
3729 }
3730 assert(len == numnondigits + numdigits);
3731 assert(numdigits > 0);
3732 }
3733
3734 /* Fill with leading zeroes to meet minimum width. */
3735 if (prec > numdigits) {
3736 PyObject *r1 = PyString_FromStringAndSize(NULL,
3737 numnondigits + prec);
3738 char *b1;
3739 if (!r1) {
3740 Py_DECREF(result);
3741 return NULL;
3742 }
3743 b1 = PyString_AS_STRING(r1);
3744 for (i = 0; i < numnondigits; ++i)
3745 *b1++ = *buf++;
3746 for (i = 0; i < prec - numdigits; i++)
3747 *b1++ = '0';
3748 for (i = 0; i < numdigits; i++)
3749 *b1++ = *buf++;
3750 *b1 = '\0';
3751 Py_DECREF(result);
3752 result = r1;
3753 buf = PyString_AS_STRING(result);
3754 len = numnondigits + prec;
3755 }
3756
3757 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003758 if (type == 'X') {
3759 /* Need to convert all lower case letters to upper case.
3760 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003761 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003762 if (buf[i] >= 'a' && buf[i] <= 'x')
3763 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003764 }
3765 *pbuf = buf;
3766 *plen = len;
3767 return result;
3768}
3769
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003770static int
Fred Drakeba096332000-07-09 07:04:36 +00003771formatint(char *buf, size_t buflen, int flags,
3772 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003773{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003774 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003775 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3776 + 1 + 1 = 24 */
3777 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003778 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003779 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003780
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003781 x = PyInt_AsLong(v);
3782 if (x == -1 && PyErr_Occurred()) {
3783 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003784 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003785 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003786 if (x < 0 && type == 'u') {
3787 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003788 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003789 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3790 sign = "-";
3791 else
3792 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003793 if (prec < 0)
3794 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003795
3796 if ((flags & F_ALT) &&
3797 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003798 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003799 * of issues that cause pain:
3800 * - when 0 is being converted, the C standard leaves off
3801 * the '0x' or '0X', which is inconsistent with other
3802 * %#x/%#X conversions and inconsistent with Python's
3803 * hex() function
3804 * - there are platforms that violate the standard and
3805 * convert 0 with the '0x' or '0X'
3806 * (Metrowerks, Compaq Tru64)
3807 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003808 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003809 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003810 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003811 * We can achieve the desired consistency by inserting our
3812 * own '0x' or '0X' prefix, and substituting %x/%X in place
3813 * of %#x/%#X.
3814 *
3815 * Note that this is the same approach as used in
3816 * formatint() in unicodeobject.c
3817 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003818 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3819 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003820 }
3821 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003822 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3823 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003824 prec, type);
3825 }
3826
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003827 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3828 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003829 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003830 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003831 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003832 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003833 return -1;
3834 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003835 if (sign[0])
3836 PyOS_snprintf(buf, buflen, fmt, -x);
3837 else
3838 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003839 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003840}
3841
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003842static int
Fred Drakeba096332000-07-09 07:04:36 +00003843formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003844{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003845 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003846 if (PyString_Check(v)) {
3847 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003848 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003849 }
3850 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003851 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003852 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003853 }
3854 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003855 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003856}
3857
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003858/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3859
3860 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3861 chars are formatted. XXX This is a magic number. Each formatting
3862 routine does bounds checking to ensure no overflow, but a better
3863 solution may be to malloc a buffer of appropriate size for each
3864 format. For now, the current solution is sufficient.
3865*/
3866#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003867
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003868PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003869PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003870{
3871 char *fmt, *res;
3872 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003873 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003874 PyObject *result, *orig_args;
3875#ifdef Py_USING_UNICODE
3876 PyObject *v, *w;
3877#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003878 PyObject *dict = NULL;
3879 if (format == NULL || !PyString_Check(format) || args == NULL) {
3880 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003881 return NULL;
3882 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003883 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003884 fmt = PyString_AS_STRING(format);
3885 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003886 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003887 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003888 if (result == NULL)
3889 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003890 res = PyString_AsString(result);
3891 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003892 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003893 argidx = 0;
3894 }
3895 else {
3896 arglen = -1;
3897 argidx = -2;
3898 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003899 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3900 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003901 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003902 while (--fmtcnt >= 0) {
3903 if (*fmt != '%') {
3904 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003905 rescnt = fmtcnt + 100;
3906 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003907 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003908 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003909 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003910 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003911 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003912 }
3913 *res++ = *fmt++;
3914 }
3915 else {
3916 /* Got a format specifier */
3917 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003918 int width = -1;
3919 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003920 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003921 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003922 PyObject *v = NULL;
3923 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003924 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003925 int sign;
3926 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003927 char formatbuf[FORMATBUFLEN];
3928 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003929#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003930 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003931 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003932#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003933
Guido van Rossumda9c2711996-12-05 21:58:58 +00003934 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003935 if (*fmt == '(') {
3936 char *keystart;
3937 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003938 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003939 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003940
3941 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003942 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003943 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003944 goto error;
3945 }
3946 ++fmt;
3947 --fmtcnt;
3948 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003949 /* Skip over balanced parentheses */
3950 while (pcount > 0 && --fmtcnt >= 0) {
3951 if (*fmt == ')')
3952 --pcount;
3953 else if (*fmt == '(')
3954 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003955 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003956 }
3957 keylen = fmt - keystart - 1;
3958 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003959 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003960 "incomplete format key");
3961 goto error;
3962 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003963 key = PyString_FromStringAndSize(keystart,
3964 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003965 if (key == NULL)
3966 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003967 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003968 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003969 args_owned = 0;
3970 }
3971 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003972 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003973 if (args == NULL) {
3974 goto error;
3975 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003976 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003977 arglen = -1;
3978 argidx = -2;
3979 }
Guido van Rossume5372401993-03-16 12:15:04 +00003980 while (--fmtcnt >= 0) {
3981 switch (c = *fmt++) {
3982 case '-': flags |= F_LJUST; continue;
3983 case '+': flags |= F_SIGN; continue;
3984 case ' ': flags |= F_BLANK; continue;
3985 case '#': flags |= F_ALT; continue;
3986 case '0': flags |= F_ZERO; continue;
3987 }
3988 break;
3989 }
3990 if (c == '*') {
3991 v = getnextarg(args, arglen, &argidx);
3992 if (v == NULL)
3993 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003994 if (!PyInt_Check(v)) {
3995 PyErr_SetString(PyExc_TypeError,
3996 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003997 goto error;
3998 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003999 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004000 if (width < 0) {
4001 flags |= F_LJUST;
4002 width = -width;
4003 }
Guido van Rossume5372401993-03-16 12:15:04 +00004004 if (--fmtcnt >= 0)
4005 c = *fmt++;
4006 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004007 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004008 width = c - '0';
4009 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004010 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004011 if (!isdigit(c))
4012 break;
4013 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004014 PyErr_SetString(
4015 PyExc_ValueError,
4016 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004017 goto error;
4018 }
4019 width = width*10 + (c - '0');
4020 }
4021 }
4022 if (c == '.') {
4023 prec = 0;
4024 if (--fmtcnt >= 0)
4025 c = *fmt++;
4026 if (c == '*') {
4027 v = getnextarg(args, arglen, &argidx);
4028 if (v == NULL)
4029 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004030 if (!PyInt_Check(v)) {
4031 PyErr_SetString(
4032 PyExc_TypeError,
4033 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004034 goto error;
4035 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004036 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004037 if (prec < 0)
4038 prec = 0;
4039 if (--fmtcnt >= 0)
4040 c = *fmt++;
4041 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004042 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004043 prec = c - '0';
4044 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004045 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004046 if (!isdigit(c))
4047 break;
4048 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004049 PyErr_SetString(
4050 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004051 "prec too big");
4052 goto error;
4053 }
4054 prec = prec*10 + (c - '0');
4055 }
4056 }
4057 } /* prec */
4058 if (fmtcnt >= 0) {
4059 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004060 if (--fmtcnt >= 0)
4061 c = *fmt++;
4062 }
4063 }
4064 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004065 PyErr_SetString(PyExc_ValueError,
4066 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004067 goto error;
4068 }
4069 if (c != '%') {
4070 v = getnextarg(args, arglen, &argidx);
4071 if (v == NULL)
4072 goto error;
4073 }
4074 sign = 0;
4075 fill = ' ';
4076 switch (c) {
4077 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004078 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004079 len = 1;
4080 break;
4081 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004082#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004083 if (PyUnicode_Check(v)) {
4084 fmt = fmt_start;
4085 argidx = argidx_start;
4086 goto unicode;
4087 }
Georg Brandld45014b2005-10-01 17:06:00 +00004088#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004089 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004090#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004091 if (temp != NULL && PyUnicode_Check(temp)) {
4092 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004093 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004094 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004095 goto unicode;
4096 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004097#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004098 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004099 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004100 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004101 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004102 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004103 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004104 if (!PyString_Check(temp)) {
4105 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004106 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004107 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004108 goto error;
4109 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004110 pbuf = PyString_AS_STRING(temp);
4111 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004112 if (prec >= 0 && len > prec)
4113 len = prec;
4114 break;
4115 case 'i':
4116 case 'd':
4117 case 'u':
4118 case 'o':
4119 case 'x':
4120 case 'X':
4121 if (c == 'i')
4122 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004123 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004124 temp = _PyString_FormatLong(v, flags,
4125 prec, c, &pbuf, &len);
4126 if (!temp)
4127 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004128 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004129 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004130 else {
4131 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004132 len = formatint(pbuf,
4133 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004134 flags, prec, c, v);
4135 if (len < 0)
4136 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004137 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004138 }
4139 if (flags & F_ZERO)
4140 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004141 break;
4142 case 'e':
4143 case 'E':
4144 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004145 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004146 case 'g':
4147 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004148 if (c == 'F')
4149 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004150 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004151 len = formatfloat(pbuf, sizeof(formatbuf),
4152 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004153 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004154 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004155 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004156 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004157 fill = '0';
4158 break;
4159 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004160#ifdef Py_USING_UNICODE
4161 if (PyUnicode_Check(v)) {
4162 fmt = fmt_start;
4163 argidx = argidx_start;
4164 goto unicode;
4165 }
4166#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004167 pbuf = formatbuf;
4168 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004169 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004170 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004171 break;
4172 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004173 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004174 "unsupported format character '%c' (0x%x) "
4175 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004176 c, c,
4177 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004178 goto error;
4179 }
4180 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004181 if (*pbuf == '-' || *pbuf == '+') {
4182 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004183 len--;
4184 }
4185 else if (flags & F_SIGN)
4186 sign = '+';
4187 else if (flags & F_BLANK)
4188 sign = ' ';
4189 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004190 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004191 }
4192 if (width < len)
4193 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004194 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004195 reslen -= rescnt;
4196 rescnt = width + fmtcnt + 100;
4197 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004198 if (reslen < 0) {
4199 Py_DECREF(result);
4200 return PyErr_NoMemory();
4201 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004202 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004203 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004204 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004205 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004206 }
4207 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004208 if (fill != ' ')
4209 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004210 rescnt--;
4211 if (width > len)
4212 width--;
4213 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004214 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4215 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004216 assert(pbuf[1] == c);
4217 if (fill != ' ') {
4218 *res++ = *pbuf++;
4219 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004220 }
Tim Petersfff53252001-04-12 18:38:48 +00004221 rescnt -= 2;
4222 width -= 2;
4223 if (width < 0)
4224 width = 0;
4225 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004226 }
4227 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004228 do {
4229 --rescnt;
4230 *res++ = fill;
4231 } while (--width > len);
4232 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004233 if (fill == ' ') {
4234 if (sign)
4235 *res++ = sign;
4236 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004237 (c == 'x' || c == 'X')) {
4238 assert(pbuf[0] == '0');
4239 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004240 *res++ = *pbuf++;
4241 *res++ = *pbuf++;
4242 }
4243 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004244 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004245 res += len;
4246 rescnt -= len;
4247 while (--width >= len) {
4248 --rescnt;
4249 *res++ = ' ';
4250 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004251 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004252 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004253 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004254 goto error;
4255 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004256 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004257 } /* '%' */
4258 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004259 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004260 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004261 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004262 goto error;
4263 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004264 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004265 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004266 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004267 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004268 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004269
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004270#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004271 unicode:
4272 if (args_owned) {
4273 Py_DECREF(args);
4274 args_owned = 0;
4275 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004276 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004277 if (PyTuple_Check(orig_args) && argidx > 0) {
4278 PyObject *v;
4279 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4280 v = PyTuple_New(n);
4281 if (v == NULL)
4282 goto error;
4283 while (--n >= 0) {
4284 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4285 Py_INCREF(w);
4286 PyTuple_SET_ITEM(v, n, w);
4287 }
4288 args = v;
4289 } else {
4290 Py_INCREF(orig_args);
4291 args = orig_args;
4292 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004293 args_owned = 1;
4294 /* Take what we have of the result and let the Unicode formatting
4295 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004296 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004297 if (_PyString_Resize(&result, rescnt))
4298 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004299 fmtcnt = PyString_GET_SIZE(format) - \
4300 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004301 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4302 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004303 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004304 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004305 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004306 if (v == NULL)
4307 goto error;
4308 /* Paste what we have (result) to what the Unicode formatting
4309 function returned (v) and return the result (or error) */
4310 w = PyUnicode_Concat(result, v);
4311 Py_DECREF(result);
4312 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004313 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004314 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004315#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004316
Guido van Rossume5372401993-03-16 12:15:04 +00004317 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004318 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004319 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004320 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004321 }
Guido van Rossume5372401993-03-16 12:15:04 +00004322 return NULL;
4323}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004324
Guido van Rossum2a61e741997-01-18 07:55:05 +00004325void
Fred Drakeba096332000-07-09 07:04:36 +00004326PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004327{
4328 register PyStringObject *s = (PyStringObject *)(*p);
4329 PyObject *t;
4330 if (s == NULL || !PyString_Check(s))
4331 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004332 /* If it's a string subclass, we don't really know what putting
4333 it in the interned dict might do. */
4334 if (!PyString_CheckExact(s))
4335 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004336 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004337 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004338 if (interned == NULL) {
4339 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004340 if (interned == NULL) {
4341 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004342 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004343 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004344 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004345 t = PyDict_GetItem(interned, (PyObject *)s);
4346 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004347 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004348 Py_DECREF(*p);
4349 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004350 return;
4351 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004352
Armin Rigo79f7ad22004-08-07 19:27:39 +00004353 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004354 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004355 return;
4356 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004357 /* The two references in interned are not counted by refcnt.
4358 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004359 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004360 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004361}
4362
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004363void
4364PyString_InternImmortal(PyObject **p)
4365{
4366 PyString_InternInPlace(p);
4367 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4368 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4369 Py_INCREF(*p);
4370 }
4371}
4372
Guido van Rossum2a61e741997-01-18 07:55:05 +00004373
4374PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004375PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004376{
4377 PyObject *s = PyString_FromString(cp);
4378 if (s == NULL)
4379 return NULL;
4380 PyString_InternInPlace(&s);
4381 return s;
4382}
4383
Guido van Rossum8cf04761997-08-02 02:57:45 +00004384void
Fred Drakeba096332000-07-09 07:04:36 +00004385PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004386{
4387 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004388 for (i = 0; i < UCHAR_MAX + 1; i++) {
4389 Py_XDECREF(characters[i]);
4390 characters[i] = NULL;
4391 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004392 Py_XDECREF(nullstring);
4393 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004394}
Barry Warsawa903ad982001-02-23 16:40:48 +00004395
Barry Warsawa903ad982001-02-23 16:40:48 +00004396void _Py_ReleaseInternedStrings(void)
4397{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004398 PyObject *keys;
4399 PyStringObject *s;
4400 int i, n;
4401
4402 if (interned == NULL || !PyDict_Check(interned))
4403 return;
4404 keys = PyDict_Keys(interned);
4405 if (keys == NULL || !PyList_Check(keys)) {
4406 PyErr_Clear();
4407 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004408 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004409
4410 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4411 detector, interned strings are not forcibly deallocated; rather, we
4412 give them their stolen references back, and then clear and DECREF
4413 the interned dict. */
4414
4415 fprintf(stderr, "releasing interned strings\n");
4416 n = PyList_GET_SIZE(keys);
4417 for (i = 0; i < n; i++) {
4418 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4419 switch (s->ob_sstate) {
4420 case SSTATE_NOT_INTERNED:
4421 /* XXX Shouldn't happen */
4422 break;
4423 case SSTATE_INTERNED_IMMORTAL:
4424 s->ob_refcnt += 1;
4425 break;
4426 case SSTATE_INTERNED_MORTAL:
4427 s->ob_refcnt += 2;
4428 break;
4429 default:
4430 Py_FatalError("Inconsistent interned string state.");
4431 }
4432 s->ob_sstate = SSTATE_NOT_INTERNED;
4433 }
4434 Py_DECREF(keys);
4435 PyDict_Clear(interned);
4436 Py_DECREF(interned);
4437 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004438}