blob: 176e0d2b611872c1ccc00e39ba1f7c497726a56d [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000157 int n = 0;
158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000184
Barry Warsawdadace02001-08-24 18:32:06 +0000185 switch (*f) {
186 case 'c':
187 (void)va_arg(count, int);
188 /* fall through... */
189 case '%':
190 n++;
191 break;
192 case 'd': case 'i': case 'x':
193 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000194 /* 20 bytes is enough to hold a 64-bit
195 integer. Decimal takes the most space.
196 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000197 n += 20;
198 break;
199 case 's':
200 s = va_arg(count, char*);
201 n += strlen(s);
202 break;
203 case 'p':
204 (void) va_arg(count, int);
205 /* maximum 64-bit pointer representation:
206 * 0xffffffffffffffff
207 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000208 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000209 */
210 n += 19;
211 break;
212 default:
213 /* if we stumble upon an unknown
214 formatting code, copy the rest of
215 the format string to the output
216 string. (we cannot just skip the
217 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000218 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 n += strlen(p);
220 goto expand;
221 }
222 } else
223 n++;
224 }
225 expand:
226 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000227 /* Since we've analyzed how much space we need for the worst case,
228 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000229 string = PyString_FromStringAndSize(NULL, n);
230 if (!string)
231 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000232
Barry Warsawdadace02001-08-24 18:32:06 +0000233 s = PyString_AsString(string);
234
235 for (f = format; *f; f++) {
236 if (*f == '%') {
237 const char* p = f++;
238 int i, longflag = 0;
239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
242 while (isdigit(Py_CHARMASK(*f)))
243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 }
250 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
251 f++;
252 /* handle the long flag, but only for %ld. others
253 can be added when necessary. */
254 if (*f == 'l' && *(f+1) == 'd') {
255 longflag = 1;
256 ++f;
257 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000258
Barry Warsawdadace02001-08-24 18:32:06 +0000259 switch (*f) {
260 case 'c':
261 *s++ = va_arg(vargs, int);
262 break;
263 case 'd':
264 if (longflag)
265 sprintf(s, "%ld", va_arg(vargs, long));
266 else
267 sprintf(s, "%d", va_arg(vargs, int));
268 s += strlen(s);
269 break;
270 case 'i':
271 sprintf(s, "%i", va_arg(vargs, int));
272 s += strlen(s);
273 break;
274 case 'x':
275 sprintf(s, "%x", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 's':
279 p = va_arg(vargs, char*);
280 i = strlen(p);
281 if (n > 0 && i > n)
282 i = n;
283 memcpy(s, p, i);
284 s += i;
285 break;
286 case 'p':
287 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000288 /* %p is ill-defined: ensure leading 0x. */
289 if (s[1] == 'X')
290 s[1] = 'x';
291 else if (s[1] != 'x') {
292 memmove(s+2, s, strlen(s)+1);
293 s[0] = '0';
294 s[1] = 'x';
295 }
Barry Warsawdadace02001-08-24 18:32:06 +0000296 s += strlen(s);
297 break;
298 case '%':
299 *s++ = '%';
300 break;
301 default:
302 strcpy(s, p);
303 s += strlen(s);
304 goto end;
305 }
306 } else
307 *s++ = *f;
308 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000309
Barry Warsawdadace02001-08-24 18:32:06 +0000310 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000311 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000312 return string;
313}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000316PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000317{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000318 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319 va_list vargs;
320
321#ifdef HAVE_STDARG_PROTOTYPES
322 va_start(vargs, format);
323#else
324 va_start(vargs);
325#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000326 ret = PyString_FromFormatV(format, vargs);
327 va_end(vargs);
328 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000329}
330
331
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000332PyObject *PyString_Decode(const char *s,
333 int size,
334 const char *encoding,
335 const char *errors)
336{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000337 PyObject *v, *str;
338
339 str = PyString_FromStringAndSize(s, size);
340 if (str == NULL)
341 return NULL;
342 v = PyString_AsDecodedString(str, encoding, errors);
343 Py_DECREF(str);
344 return v;
345}
346
347PyObject *PyString_AsDecodedObject(PyObject *str,
348 const char *encoding,
349 const char *errors)
350{
351 PyObject *v;
352
353 if (!PyString_Check(str)) {
354 PyErr_BadArgument();
355 goto onError;
356 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000357
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000358 if (encoding == NULL) {
359#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000361#else
362 PyErr_SetString(PyExc_ValueError, "no encoding specified");
363 goto onError;
364#endif
365 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000366
367 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000368 v = PyCodec_Decode(str, encoding, errors);
369 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000370 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000371
372 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000373
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000374 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000375 return NULL;
376}
377
378PyObject *PyString_AsDecodedString(PyObject *str,
379 const char *encoding,
380 const char *errors)
381{
382 PyObject *v;
383
384 v = PyString_AsDecodedObject(str, encoding, errors);
385 if (v == NULL)
386 goto onError;
387
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000388#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389 /* Convert Unicode to a string using the default encoding */
390 if (PyUnicode_Check(v)) {
391 PyObject *temp = v;
392 v = PyUnicode_AsEncodedString(v, NULL, NULL);
393 Py_DECREF(temp);
394 if (v == NULL)
395 goto onError;
396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000397#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 if (!PyString_Check(v)) {
399 PyErr_Format(PyExc_TypeError,
400 "decoder did not return a string object (type=%.400s)",
401 v->ob_type->tp_name);
402 Py_DECREF(v);
403 goto onError;
404 }
405
406 return v;
407
408 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 return NULL;
410}
411
412PyObject *PyString_Encode(const char *s,
413 int size,
414 const char *encoding,
415 const char *errors)
416{
417 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000418
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000419 str = PyString_FromStringAndSize(s, size);
420 if (str == NULL)
421 return NULL;
422 v = PyString_AsEncodedString(str, encoding, errors);
423 Py_DECREF(str);
424 return v;
425}
426
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000427PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000428 const char *encoding,
429 const char *errors)
430{
431 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000432
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 if (!PyString_Check(str)) {
434 PyErr_BadArgument();
435 goto onError;
436 }
437
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000438 if (encoding == NULL) {
439#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000440 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000441#else
442 PyErr_SetString(PyExc_ValueError, "no encoding specified");
443 goto onError;
444#endif
445 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446
447 /* Encode via the codec registry */
448 v = PyCodec_Encode(str, encoding, errors);
449 if (v == NULL)
450 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451
452 return v;
453
454 onError:
455 return NULL;
456}
457
458PyObject *PyString_AsEncodedString(PyObject *str,
459 const char *encoding,
460 const char *errors)
461{
462 PyObject *v;
463
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000464 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000465 if (v == NULL)
466 goto onError;
467
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000468#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469 /* Convert Unicode to a string using the default encoding */
470 if (PyUnicode_Check(v)) {
471 PyObject *temp = v;
472 v = PyUnicode_AsEncodedString(v, NULL, NULL);
473 Py_DECREF(temp);
474 if (v == NULL)
475 goto onError;
476 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000477#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 if (!PyString_Check(v)) {
479 PyErr_Format(PyExc_TypeError,
480 "encoder did not return a string object (type=%.400s)",
481 v->ob_type->tp_name);
482 Py_DECREF(v);
483 goto onError;
484 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000485
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000486 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000487
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000488 onError:
489 return NULL;
490}
491
Guido van Rossum234f9421993-06-17 12:35:49 +0000492static void
Fred Drakeba096332000-07-09 07:04:36 +0000493string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000494{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000495 switch (PyString_CHECK_INTERNED(op)) {
496 case SSTATE_NOT_INTERNED:
497 break;
498
499 case SSTATE_INTERNED_MORTAL:
500 /* revive dead object temporarily for DelItem */
501 op->ob_refcnt = 3;
502 if (PyDict_DelItem(interned, op) != 0)
503 Py_FatalError(
504 "deletion of interned string failed");
505 break;
506
507 case SSTATE_INTERNED_IMMORTAL:
508 Py_FatalError("Immortal interned string died.");
509
510 default:
511 Py_FatalError("Inconsistent interned string state.");
512 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000513 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000514}
515
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000516/* Unescape a backslash-escaped string. If unicode is non-zero,
517 the string is a u-literal. If recode_encoding is non-zero,
518 the string is UTF-8 encoded and should be re-encoded in the
519 specified encoding. */
520
521PyObject *PyString_DecodeEscape(const char *s,
522 int len,
523 const char *errors,
524 int unicode,
525 const char *recode_encoding)
526{
527 int c;
528 char *p, *buf;
529 const char *end;
530 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000531 int newlen = recode_encoding ? 4*len:len;
532 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000533 if (v == NULL)
534 return NULL;
535 p = buf = PyString_AsString(v);
536 end = s + len;
537 while (s < end) {
538 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000539 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540#ifdef Py_USING_UNICODE
541 if (recode_encoding && (*s & 0x80)) {
542 PyObject *u, *w;
543 char *r;
544 const char* t;
545 int rn;
546 t = s;
547 /* Decode non-ASCII bytes as UTF-8. */
548 while (t < end && (*t & 0x80)) t++;
549 u = PyUnicode_DecodeUTF8(s, t - s, errors);
550 if(!u) goto failed;
551
552 /* Recode them in target encoding. */
553 w = PyUnicode_AsEncodedString(
554 u, recode_encoding, errors);
555 Py_DECREF(u);
556 if (!w) goto failed;
557
558 /* Append bytes to output buffer. */
559 r = PyString_AsString(w);
560 rn = PyString_Size(w);
561 memcpy(p, r, rn);
562 p += rn;
563 Py_DECREF(w);
564 s = t;
565 } else {
566 *p++ = *s++;
567 }
568#else
569 *p++ = *s++;
570#endif
571 continue;
572 }
573 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000574 if (s==end) {
575 PyErr_SetString(PyExc_ValueError,
576 "Trailing \\ in string");
577 goto failed;
578 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000579 switch (*s++) {
580 /* XXX This assumes ASCII! */
581 case '\n': break;
582 case '\\': *p++ = '\\'; break;
583 case '\'': *p++ = '\''; break;
584 case '\"': *p++ = '\"'; break;
585 case 'b': *p++ = '\b'; break;
586 case 'f': *p++ = '\014'; break; /* FF */
587 case 't': *p++ = '\t'; break;
588 case 'n': *p++ = '\n'; break;
589 case 'r': *p++ = '\r'; break;
590 case 'v': *p++ = '\013'; break; /* VT */
591 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
592 case '0': case '1': case '2': case '3':
593 case '4': case '5': case '6': case '7':
594 c = s[-1] - '0';
595 if ('0' <= *s && *s <= '7') {
596 c = (c<<3) + *s++ - '0';
597 if ('0' <= *s && *s <= '7')
598 c = (c<<3) + *s++ - '0';
599 }
600 *p++ = c;
601 break;
602 case 'x':
603 if (isxdigit(Py_CHARMASK(s[0]))
604 && isxdigit(Py_CHARMASK(s[1]))) {
605 unsigned int x = 0;
606 c = Py_CHARMASK(*s);
607 s++;
608 if (isdigit(c))
609 x = c - '0';
610 else if (islower(c))
611 x = 10 + c - 'a';
612 else
613 x = 10 + c - 'A';
614 x = x << 4;
615 c = Py_CHARMASK(*s);
616 s++;
617 if (isdigit(c))
618 x += c - '0';
619 else if (islower(c))
620 x += 10 + c - 'a';
621 else
622 x += 10 + c - 'A';
623 *p++ = x;
624 break;
625 }
626 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000627 PyErr_SetString(PyExc_ValueError,
628 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000629 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000630 }
631 if (strcmp(errors, "replace") == 0) {
632 *p++ = '?';
633 } else if (strcmp(errors, "ignore") == 0)
634 /* do nothing */;
635 else {
636 PyErr_Format(PyExc_ValueError,
637 "decoding error; "
638 "unknown error handling code: %.400s",
639 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000640 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000641 }
642#ifndef Py_USING_UNICODE
643 case 'u':
644 case 'U':
645 case 'N':
646 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000647 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 "Unicode escapes not legal "
649 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000650 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000651 }
652#endif
653 default:
654 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000655 s--;
656 goto non_esc; /* an arbitry number of unescaped
657 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000658 }
659 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000660 if (p-buf < newlen)
661 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000662 return v;
663 failed:
664 Py_DECREF(v);
665 return NULL;
666}
667
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000668static int
669string_getsize(register PyObject *op)
670{
671 char *s;
672 int len;
673 if (PyString_AsStringAndSize(op, &s, &len))
674 return -1;
675 return len;
676}
677
678static /*const*/ char *
679string_getbuffer(register PyObject *op)
680{
681 char *s;
682 int len;
683 if (PyString_AsStringAndSize(op, &s, &len))
684 return NULL;
685 return s;
686}
687
Guido van Rossumd7047b31995-01-02 19:07:15 +0000688int
Fred Drakeba096332000-07-09 07:04:36 +0000689PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000690{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000691 if (!PyString_Check(op))
692 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000694}
695
696/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000697PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000699 if (!PyString_Check(op))
700 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702}
703
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704int
705PyString_AsStringAndSize(register PyObject *obj,
706 register char **s,
707 register int *len)
708{
709 if (s == NULL) {
710 PyErr_BadInternalCall();
711 return -1;
712 }
713
714 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000715#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (PyUnicode_Check(obj)) {
717 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
718 if (obj == NULL)
719 return -1;
720 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000721 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000722#endif
723 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 PyErr_Format(PyExc_TypeError,
725 "expected string or Unicode object, "
726 "%.200s found", obj->ob_type->tp_name);
727 return -1;
728 }
729 }
730
731 *s = PyString_AS_STRING(obj);
732 if (len != NULL)
733 *len = PyString_GET_SIZE(obj);
734 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
735 PyErr_SetString(PyExc_TypeError,
736 "expected string without null bytes");
737 return -1;
738 }
739 return 0;
740}
741
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742/* Methods */
743
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000744static int
Fred Drakeba096332000-07-09 07:04:36 +0000745string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000746{
747 int i;
748 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000749 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000750
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000751 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000752 if (! PyString_CheckExact(op)) {
753 int ret;
754 /* A str subclass may have its own __str__ method. */
755 op = (PyStringObject *) PyObject_Str((PyObject *)op);
756 if (op == NULL)
757 return -1;
758 ret = string_print(op, fp, flags);
759 Py_DECREF(op);
760 return ret;
761 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000762 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000763#ifdef __VMS
764 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
765#else
766 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
767#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000768 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000770
Thomas Wouters7e474022000-07-16 12:04:32 +0000771 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000772 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000773 if (memchr(op->ob_sval, '\'', op->ob_size) &&
774 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775 quote = '"';
776
777 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000778 for (i = 0; i < op->ob_size; i++) {
779 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000781 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000782 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000783 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000784 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000785 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000786 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000787 fprintf(fp, "\\r");
788 else if (c < ' ' || c >= 0x7f)
789 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000790 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000791 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000794 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000795}
796
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000797PyObject *
798PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000800 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000801 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000802 PyObject *v;
803 if (newsize > INT_MAX) {
804 PyErr_SetString(PyExc_OverflowError,
805 "string is too large to make repr");
806 }
807 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000809 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000810 }
811 else {
812 register int i;
813 register char c;
814 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815 int quote;
816
Thomas Wouters7e474022000-07-16 12:04:32 +0000817 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000818 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000819 if (smartquotes &&
820 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000821 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000822 quote = '"';
823
Tim Peters9161c8b2001-12-03 01:55:38 +0000824 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000825 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000826 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000827 /* There's at least enough room for a hex escape
828 and a closing quote. */
829 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000833 else if (c == '\t')
834 *p++ = '\\', *p++ = 't';
835 else if (c == '\n')
836 *p++ = '\\', *p++ = 'n';
837 else if (c == '\r')
838 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 else if (c < ' ' || c >= 0x7f) {
840 /* For performance, we don't want to call
841 PyOS_snprintf here (extra layers of
842 function call). */
843 sprintf(p, "\\x%02x", c & 0xff);
844 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000845 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 else
847 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000849 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000850 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000852 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000853 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000854 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856}
857
Guido van Rossum189f1df2001-05-01 16:51:53 +0000858static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000859string_repr(PyObject *op)
860{
861 return PyString_Repr(op, 1);
862}
863
864static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000865string_str(PyObject *s)
866{
Tim Petersc9933152001-10-16 20:18:24 +0000867 assert(PyString_Check(s));
868 if (PyString_CheckExact(s)) {
869 Py_INCREF(s);
870 return s;
871 }
872 else {
873 /* Subtype -- return genuine string with the same value. */
874 PyStringObject *t = (PyStringObject *) s;
875 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
876 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000877}
878
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879static int
Fred Drakeba096332000-07-09 07:04:36 +0000880string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881{
882 return a->ob_size;
883}
884
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000885static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000886string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887{
888 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000889 register PyStringObject *op;
890 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000891#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000892 if (PyUnicode_Check(bb))
893 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000894#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000895 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000896 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000897 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000898 return NULL;
899 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000900#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000902 if ((a->ob_size == 0 || b->ob_size == 0) &&
903 PyString_CheckExact(a) && PyString_CheckExact(b)) {
904 if (a->ob_size == 0) {
905 Py_INCREF(bb);
906 return bb;
907 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 Py_INCREF(a);
909 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000910 }
911 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000912 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000913 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000914 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000915 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000916 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000917 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000918 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000919 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
920 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
921 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000922 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923#undef b
924}
925
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000926static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000927string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000928{
929 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000930 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000931 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000933 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934 if (n < 0)
935 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000936 /* watch out for overflows: the size can overflow int,
937 * and the # of bytes needed can overflow size_t
938 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000940 if (n && size / n != a->ob_size) {
941 PyErr_SetString(PyExc_OverflowError,
942 "repeated string is too long");
943 return NULL;
944 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000945 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 Py_INCREF(a);
947 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 }
Tim Peterse7c05322004-06-27 17:24:49 +0000949 nbytes = (size_t)size;
950 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000951 PyErr_SetString(PyExc_OverflowError,
952 "repeated string is too long");
953 return NULL;
954 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000956 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000957 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000959 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000960 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000961 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000962 op->ob_sval[size] = '\0';
963 if (a->ob_size == 1 && n > 0) {
964 memset(op->ob_sval, a->ob_sval[0] , n);
965 return (PyObject *) op;
966 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000967 i = 0;
968 if (i < size) {
969 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
970 i = (int) a->ob_size;
971 }
972 while (i < size) {
973 j = (i <= size-i) ? i : size-i;
974 memcpy(op->ob_sval+i, op->ob_sval, j);
975 i += j;
976 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978}
979
980/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
981
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000983string_slice(register PyStringObject *a, register int i, register int j)
984 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985{
986 if (i < 0)
987 i = 0;
988 if (j < 0)
989 j = 0; /* Avoid signed/unsigned bug in next line */
990 if (j > a->ob_size)
991 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000992 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
993 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 Py_INCREF(a);
995 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 }
997 if (j < i)
998 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001000}
1001
Guido van Rossum9284a572000-03-07 15:53:43 +00001002static int
Fred Drakeba096332000-07-09 07:04:36 +00001003string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001004{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001005 char *s = PyString_AS_STRING(a);
1006 const char *sub = PyString_AS_STRING(el);
1007 char *last;
1008 int len_sub = PyString_GET_SIZE(el);
1009 int shortsub;
1010 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001011
1012 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001013#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014 if (PyUnicode_Check(el))
1015 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001016#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001017 if (!PyString_Check(el)) {
1018 PyErr_SetString(PyExc_TypeError,
1019 "'in <string>' requires string as left operand");
1020 return -1;
1021 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001022 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001023
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001024 if (len_sub == 0)
1025 return 1;
1026 /* last points to one char beyond the start of the rightmost
1027 substring. When s<last, there is still room for a possible match
1028 and s[0] through s[len_sub-1] will be in bounds.
1029 shortsub is len_sub minus the last character which is checked
1030 separately just before the memcmp(). That check helps prevent
1031 false starts and saves the setup time for memcmp().
1032 */
1033 firstchar = sub[0];
1034 shortsub = len_sub - 1;
1035 lastchar = sub[shortsub];
1036 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1037 while (s < last) {
1038 s = memchr(s, firstchar, last-s);
1039 if (s == NULL)
1040 return 0;
1041 assert(s < last);
1042 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001043 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001044 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001045 }
1046 return 0;
1047}
1048
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001050string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001052 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001053 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001054 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001056 return NULL;
1057 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001058 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001059 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001060 if (v == NULL)
1061 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001062 else {
1063#ifdef COUNT_ALLOCS
1064 one_strings++;
1065#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001066 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001067 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001068 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001069}
1070
Martin v. Löwiscd353062001-05-24 16:56:35 +00001071static PyObject*
1072string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001074 int c;
1075 int len_a, len_b;
1076 int min_len;
1077 PyObject *result;
1078
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001079 /* Make sure both arguments are strings. */
1080 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001081 result = Py_NotImplemented;
1082 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001083 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001084 if (a == b) {
1085 switch (op) {
1086 case Py_EQ:case Py_LE:case Py_GE:
1087 result = Py_True;
1088 goto out;
1089 case Py_NE:case Py_LT:case Py_GT:
1090 result = Py_False;
1091 goto out;
1092 }
1093 }
1094 if (op == Py_EQ) {
1095 /* Supporting Py_NE here as well does not save
1096 much time, since Py_NE is rarely used. */
1097 if (a->ob_size == b->ob_size
1098 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001099 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001100 a->ob_size) == 0)) {
1101 result = Py_True;
1102 } else {
1103 result = Py_False;
1104 }
1105 goto out;
1106 }
1107 len_a = a->ob_size; len_b = b->ob_size;
1108 min_len = (len_a < len_b) ? len_a : len_b;
1109 if (min_len > 0) {
1110 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1111 if (c==0)
1112 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1113 }else
1114 c = 0;
1115 if (c == 0)
1116 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1117 switch (op) {
1118 case Py_LT: c = c < 0; break;
1119 case Py_LE: c = c <= 0; break;
1120 case Py_EQ: assert(0); break; /* unreachable */
1121 case Py_NE: c = c != 0; break;
1122 case Py_GT: c = c > 0; break;
1123 case Py_GE: c = c >= 0; break;
1124 default:
1125 result = Py_NotImplemented;
1126 goto out;
1127 }
1128 result = c ? Py_True : Py_False;
1129 out:
1130 Py_INCREF(result);
1131 return result;
1132}
1133
1134int
1135_PyString_Eq(PyObject *o1, PyObject *o2)
1136{
1137 PyStringObject *a, *b;
1138 a = (PyStringObject*)o1;
1139 b = (PyStringObject*)o2;
1140 return a->ob_size == b->ob_size
1141 && *a->ob_sval == *b->ob_sval
1142 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001143}
1144
Guido van Rossum9bfef441993-03-29 10:43:31 +00001145static long
Fred Drakeba096332000-07-09 07:04:36 +00001146string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001147{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 register int len;
1149 register unsigned char *p;
1150 register long x;
1151
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001152 if (a->ob_shash != -1)
1153 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001154 len = a->ob_size;
1155 p = (unsigned char *) a->ob_sval;
1156 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001157 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001158 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001159 x ^= a->ob_size;
1160 if (x == -1)
1161 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001162 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001163 return x;
1164}
1165
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001166static PyObject*
1167string_subscript(PyStringObject* self, PyObject* item)
1168{
1169 if (PyInt_Check(item)) {
1170 long i = PyInt_AS_LONG(item);
1171 if (i < 0)
1172 i += PyString_GET_SIZE(self);
1173 return string_item(self,i);
1174 }
1175 else if (PyLong_Check(item)) {
1176 long i = PyLong_AsLong(item);
1177 if (i == -1 && PyErr_Occurred())
1178 return NULL;
1179 if (i < 0)
1180 i += PyString_GET_SIZE(self);
1181 return string_item(self,i);
1182 }
1183 else if (PySlice_Check(item)) {
1184 int start, stop, step, slicelength, cur, i;
1185 char* source_buf;
1186 char* result_buf;
1187 PyObject* result;
1188
1189 if (PySlice_GetIndicesEx((PySliceObject*)item,
1190 PyString_GET_SIZE(self),
1191 &start, &stop, &step, &slicelength) < 0) {
1192 return NULL;
1193 }
1194
1195 if (slicelength <= 0) {
1196 return PyString_FromStringAndSize("", 0);
1197 }
1198 else {
1199 source_buf = PyString_AsString((PyObject*)self);
1200 result_buf = PyMem_Malloc(slicelength);
1201
1202 for (cur = start, i = 0; i < slicelength;
1203 cur += step, i++) {
1204 result_buf[i] = source_buf[cur];
1205 }
1206
1207 result = PyString_FromStringAndSize(result_buf,
1208 slicelength);
1209 PyMem_Free(result_buf);
1210 return result;
1211 }
1212 }
1213 else {
1214 PyErr_SetString(PyExc_TypeError,
1215 "string indices must be integers");
1216 return NULL;
1217 }
1218}
1219
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001220static int
Fred Drakeba096332000-07-09 07:04:36 +00001221string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001222{
1223 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001224 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001225 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001226 return -1;
1227 }
1228 *ptr = (void *)self->ob_sval;
1229 return self->ob_size;
1230}
1231
1232static int
Fred Drakeba096332000-07-09 07:04:36 +00001233string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001234{
Guido van Rossum045e6881997-09-08 18:30:11 +00001235 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001236 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001237 return -1;
1238}
1239
1240static int
Fred Drakeba096332000-07-09 07:04:36 +00001241string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001242{
1243 if ( lenp )
1244 *lenp = self->ob_size;
1245 return 1;
1246}
1247
Guido van Rossum1db70701998-10-08 02:18:52 +00001248static int
Fred Drakeba096332000-07-09 07:04:36 +00001249string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001250{
1251 if ( index != 0 ) {
1252 PyErr_SetString(PyExc_SystemError,
1253 "accessing non-existent string segment");
1254 return -1;
1255 }
1256 *ptr = self->ob_sval;
1257 return self->ob_size;
1258}
1259
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001260static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001261 (inquiry)string_length, /*sq_length*/
1262 (binaryfunc)string_concat, /*sq_concat*/
1263 (intargfunc)string_repeat, /*sq_repeat*/
1264 (intargfunc)string_item, /*sq_item*/
1265 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001266 0, /*sq_ass_item*/
1267 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001268 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001269};
1270
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001271static PyMappingMethods string_as_mapping = {
1272 (inquiry)string_length,
1273 (binaryfunc)string_subscript,
1274 0,
1275};
1276
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001277static PyBufferProcs string_as_buffer = {
1278 (getreadbufferproc)string_buffer_getreadbuf,
1279 (getwritebufferproc)string_buffer_getwritebuf,
1280 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001281 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001282};
1283
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001284
1285
1286#define LEFTSTRIP 0
1287#define RIGHTSTRIP 1
1288#define BOTHSTRIP 2
1289
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001290/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001291static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1292
1293#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001294
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001295#define SPLIT_APPEND(data, left, right) \
1296 str = PyString_FromStringAndSize((data) + (left), \
1297 (right) - (left)); \
1298 if (str == NULL) \
1299 goto onError; \
1300 if (PyList_Append(list, str)) { \
1301 Py_DECREF(str); \
1302 goto onError; \
1303 } \
1304 else \
1305 Py_DECREF(str);
1306
1307#define SPLIT_INSERT(data, left, right) \
1308 str = PyString_FromStringAndSize((data) + (left), \
1309 (right) - (left)); \
1310 if (str == NULL) \
1311 goto onError; \
1312 if (PyList_Insert(list, 0, str)) { \
1313 Py_DECREF(str); \
1314 goto onError; \
1315 } \
1316 else \
1317 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318
1319static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001320split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001322 int i, j;
1323 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001324 PyObject *list = PyList_New(0);
1325
1326 if (list == NULL)
1327 return NULL;
1328
Guido van Rossum4c08d552000-03-10 22:55:18 +00001329 for (i = j = 0; i < len; ) {
1330 while (i < len && isspace(Py_CHARMASK(s[i])))
1331 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001333 while (i < len && !isspace(Py_CHARMASK(s[i])))
1334 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336 if (maxsplit-- <= 0)
1337 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001338 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001339 while (i < len && isspace(Py_CHARMASK(s[i])))
1340 i++;
1341 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342 }
1343 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001344 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001345 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001346 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001348 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 Py_DECREF(list);
1350 return NULL;
1351}
1352
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001353static PyObject *
1354split_char(const char *s, int len, char ch, int maxcount)
1355{
1356 register int i, j;
1357 PyObject *str;
1358 PyObject *list = PyList_New(0);
1359
1360 if (list == NULL)
1361 return NULL;
1362
1363 for (i = j = 0; i < len; ) {
1364 if (s[i] == ch) {
1365 if (maxcount-- <= 0)
1366 break;
1367 SPLIT_APPEND(s, j, i);
1368 i = j = i + 1;
1369 } else
1370 i++;
1371 }
1372 if (j <= len) {
1373 SPLIT_APPEND(s, j, len);
1374 }
1375 return list;
1376
1377 onError:
1378 Py_DECREF(list);
1379 return NULL;
1380}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001381
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001382PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383"S.split([sep [,maxsplit]]) -> list of strings\n\
1384\n\
1385Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001386delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001387splits are done. If sep is not specified or is None, any\n\
1388whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389
1390static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001391string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001392{
1393 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001394 int maxsplit = -1;
1395 const char *s = PyString_AS_STRING(self), *sub;
1396 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001397
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001400 if (maxsplit < 0)
1401 maxsplit = INT_MAX;
1402 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001404 if (PyString_Check(subobj)) {
1405 sub = PyString_AS_STRING(subobj);
1406 n = PyString_GET_SIZE(subobj);
1407 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001408#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001409 else if (PyUnicode_Check(subobj))
1410 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001411#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001412 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1413 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001414
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 if (n == 0) {
1416 PyErr_SetString(PyExc_ValueError, "empty separator");
1417 return NULL;
1418 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001419 else if (n == 1)
1420 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421
1422 list = PyList_New(0);
1423 if (list == NULL)
1424 return NULL;
1425
1426 i = j = 0;
1427 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001428 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001429 if (maxsplit-- <= 0)
1430 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1432 if (item == NULL)
1433 goto fail;
1434 err = PyList_Append(list, item);
1435 Py_DECREF(item);
1436 if (err < 0)
1437 goto fail;
1438 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439 }
1440 else
1441 i++;
1442 }
1443 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1444 if (item == NULL)
1445 goto fail;
1446 err = PyList_Append(list, item);
1447 Py_DECREF(item);
1448 if (err < 0)
1449 goto fail;
1450
1451 return list;
1452
1453 fail:
1454 Py_DECREF(list);
1455 return NULL;
1456}
1457
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001458static PyObject *
1459rsplit_whitespace(const char *s, int len, int maxsplit)
1460{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001461 int i, j;
1462 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001463 PyObject *list = PyList_New(0);
1464
1465 if (list == NULL)
1466 return NULL;
1467
1468 for (i = j = len - 1; i >= 0; ) {
1469 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1470 i--;
1471 j = i;
1472 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1473 i--;
1474 if (j > i) {
1475 if (maxsplit-- <= 0)
1476 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001477 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001478 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1479 i--;
1480 j = i;
1481 }
1482 }
1483 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001484 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001485 }
1486 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001487 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001488 Py_DECREF(list);
1489 return NULL;
1490}
1491
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001492static PyObject *
1493rsplit_char(const char *s, int len, char ch, int maxcount)
1494{
1495 register int i, j;
1496 PyObject *str;
1497 PyObject *list = PyList_New(0);
1498
1499 if (list == NULL)
1500 return NULL;
1501
1502 for (i = j = len - 1; i >= 0; ) {
1503 if (s[i] == ch) {
1504 if (maxcount-- <= 0)
1505 break;
1506 SPLIT_INSERT(s, i + 1, j + 1);
1507 j = i = i - 1;
1508 } else
1509 i--;
1510 }
1511 if (j >= -1) {
1512 SPLIT_INSERT(s, 0, j + 1);
1513 }
1514 return list;
1515
1516 onError:
1517 Py_DECREF(list);
1518 return NULL;
1519}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001520
1521PyDoc_STRVAR(rsplit__doc__,
1522"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1523\n\
1524Return a list of the words in the string S, using sep as the\n\
1525delimiter string, starting at the end of the string and working\n\
1526to the front. If maxsplit is given, at most maxsplit splits are\n\
1527done. If sep is not specified or is None, any whitespace string\n\
1528is a separator.");
1529
1530static PyObject *
1531string_rsplit(PyStringObject *self, PyObject *args)
1532{
1533 int len = PyString_GET_SIZE(self), n, i, j, err;
1534 int maxsplit = -1;
1535 const char *s = PyString_AS_STRING(self), *sub;
1536 PyObject *list, *item, *subobj = Py_None;
1537
1538 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1539 return NULL;
1540 if (maxsplit < 0)
1541 maxsplit = INT_MAX;
1542 if (subobj == Py_None)
1543 return rsplit_whitespace(s, len, maxsplit);
1544 if (PyString_Check(subobj)) {
1545 sub = PyString_AS_STRING(subobj);
1546 n = PyString_GET_SIZE(subobj);
1547 }
1548#ifdef Py_USING_UNICODE
1549 else if (PyUnicode_Check(subobj))
1550 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1551#endif
1552 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1553 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001554
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001555 if (n == 0) {
1556 PyErr_SetString(PyExc_ValueError, "empty separator");
1557 return NULL;
1558 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001559 else if (n == 1)
1560 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001561
1562 list = PyList_New(0);
1563 if (list == NULL)
1564 return NULL;
1565
1566 j = len;
1567 i = j - n;
1568 while (i >= 0) {
1569 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1570 if (maxsplit-- <= 0)
1571 break;
1572 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1573 if (item == NULL)
1574 goto fail;
1575 err = PyList_Insert(list, 0, item);
1576 Py_DECREF(item);
1577 if (err < 0)
1578 goto fail;
1579 j = i;
1580 i -= n;
1581 }
1582 else
1583 i--;
1584 }
1585 item = PyString_FromStringAndSize(s, j);
1586 if (item == NULL)
1587 goto fail;
1588 err = PyList_Insert(list, 0, item);
1589 Py_DECREF(item);
1590 if (err < 0)
1591 goto fail;
1592
1593 return list;
1594
1595 fail:
1596 Py_DECREF(list);
1597 return NULL;
1598}
1599
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001601PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602"S.join(sequence) -> string\n\
1603\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001604Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001605sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001606
1607static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001608string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609{
1610 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001611 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613 char *p;
1614 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001615 size_t sz = 0;
1616 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001617 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618
Tim Peters19fe14e2001-01-19 03:03:47 +00001619 seq = PySequence_Fast(orig, "");
1620 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001621 if (PyErr_ExceptionMatches(PyExc_TypeError))
1622 PyErr_Format(PyExc_TypeError,
1623 "sequence expected, %.80s found",
1624 orig->ob_type->tp_name);
1625 return NULL;
1626 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001627
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001628 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001629 if (seqlen == 0) {
1630 Py_DECREF(seq);
1631 return PyString_FromString("");
1632 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001634 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001635 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1636 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001637 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001638 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001639 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001641
Raymond Hettinger674f2412004-08-23 23:23:54 +00001642 /* There are at least two things to join, or else we have a subclass
1643 * of the builtin types in the sequence.
1644 * Do a pre-pass to figure out the total amount of space we'll
1645 * need (sz), see whether any argument is absurd, and defer to
1646 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001647 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001648 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001649 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001650 item = PySequence_Fast_GET_ITEM(seq, i);
1651 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001652#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001653 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001654 /* Defer to Unicode join.
1655 * CAUTION: There's no gurantee that the
1656 * original sequence can be iterated over
1657 * again, so we must pass seq here.
1658 */
1659 PyObject *result;
1660 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001661 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001662 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001663 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001664#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001665 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001666 "sequence item %i: expected string,"
1667 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001668 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001669 Py_DECREF(seq);
1670 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001671 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001672 sz += PyString_GET_SIZE(item);
1673 if (i != 0)
1674 sz += seplen;
1675 if (sz < old_sz || sz > INT_MAX) {
1676 PyErr_SetString(PyExc_OverflowError,
1677 "join() is too long for a Python string");
1678 Py_DECREF(seq);
1679 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001680 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001681 }
1682
1683 /* Allocate result space. */
1684 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1685 if (res == NULL) {
1686 Py_DECREF(seq);
1687 return NULL;
1688 }
1689
1690 /* Catenate everything. */
1691 p = PyString_AS_STRING(res);
1692 for (i = 0; i < seqlen; ++i) {
1693 size_t n;
1694 item = PySequence_Fast_GET_ITEM(seq, i);
1695 n = PyString_GET_SIZE(item);
1696 memcpy(p, PyString_AS_STRING(item), n);
1697 p += n;
1698 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001699 memcpy(p, sep, seplen);
1700 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001701 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001702 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001703
Jeremy Hylton49048292000-07-11 03:28:17 +00001704 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706}
1707
Tim Peters52e155e2001-06-16 05:42:57 +00001708PyObject *
1709_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001710{
Tim Petersa7259592001-06-16 05:11:17 +00001711 assert(sep != NULL && PyString_Check(sep));
1712 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001713 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001714}
1715
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001716static void
1717string_adjust_indices(int *start, int *end, int len)
1718{
1719 if (*end > len)
1720 *end = len;
1721 else if (*end < 0)
1722 *end += len;
1723 if (*end < 0)
1724 *end = 0;
1725 if (*start < 0)
1726 *start += len;
1727 if (*start < 0)
1728 *start = 0;
1729}
1730
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731static long
Fred Drakeba096332000-07-09 07:04:36 +00001732string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001734 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735 int len = PyString_GET_SIZE(self);
1736 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001739 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001740 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001741 return -2;
1742 if (PyString_Check(subobj)) {
1743 sub = PyString_AS_STRING(subobj);
1744 n = PyString_GET_SIZE(subobj);
1745 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001746#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001747 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001748 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001749#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001750 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751 return -2;
1752
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001753 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 if (dir > 0) {
1756 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001758 last -= n;
1759 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001760 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001761 return (long)i;
1762 }
1763 else {
1764 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001765
Guido van Rossum4c08d552000-03-10 22:55:18 +00001766 if (n == 0 && i <= last)
1767 return (long)last;
1768 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001769 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001770 return (long)j;
1771 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001772
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773 return -1;
1774}
1775
1776
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001777PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778"S.find(sub [,start [,end]]) -> int\n\
1779\n\
1780Return the lowest index in S where substring sub is found,\n\
1781such that sub is contained within s[start,end]. Optional\n\
1782arguments start and end are interpreted as in slice notation.\n\
1783\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001784Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785
1786static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001787string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001789 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790 if (result == -2)
1791 return NULL;
1792 return PyInt_FromLong(result);
1793}
1794
1795
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001796PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797"S.index(sub [,start [,end]]) -> int\n\
1798\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001799Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800
1801static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001802string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001804 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805 if (result == -2)
1806 return NULL;
1807 if (result == -1) {
1808 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001809 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810 return NULL;
1811 }
1812 return PyInt_FromLong(result);
1813}
1814
1815
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001816PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817"S.rfind(sub [,start [,end]]) -> int\n\
1818\n\
1819Return the highest index in S where substring sub is found,\n\
1820such that sub is contained within s[start,end]. Optional\n\
1821arguments start and end are interpreted as in slice notation.\n\
1822\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001823Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824
1825static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001826string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001828 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829 if (result == -2)
1830 return NULL;
1831 return PyInt_FromLong(result);
1832}
1833
1834
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001835PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836"S.rindex(sub [,start [,end]]) -> int\n\
1837\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001838Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839
1840static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001841string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001843 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844 if (result == -2)
1845 return NULL;
1846 if (result == -1) {
1847 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001848 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849 return NULL;
1850 }
1851 return PyInt_FromLong(result);
1852}
1853
1854
1855static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001856do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1857{
1858 char *s = PyString_AS_STRING(self);
1859 int len = PyString_GET_SIZE(self);
1860 char *sep = PyString_AS_STRING(sepobj);
1861 int seplen = PyString_GET_SIZE(sepobj);
1862 int i, j;
1863
1864 i = 0;
1865 if (striptype != RIGHTSTRIP) {
1866 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1867 i++;
1868 }
1869 }
1870
1871 j = len;
1872 if (striptype != LEFTSTRIP) {
1873 do {
1874 j--;
1875 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1876 j++;
1877 }
1878
1879 if (i == 0 && j == len && PyString_CheckExact(self)) {
1880 Py_INCREF(self);
1881 return (PyObject*)self;
1882 }
1883 else
1884 return PyString_FromStringAndSize(s+i, j-i);
1885}
1886
1887
1888static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001889do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890{
1891 char *s = PyString_AS_STRING(self);
1892 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894 i = 0;
1895 if (striptype != RIGHTSTRIP) {
1896 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1897 i++;
1898 }
1899 }
1900
1901 j = len;
1902 if (striptype != LEFTSTRIP) {
1903 do {
1904 j--;
1905 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1906 j++;
1907 }
1908
Tim Peters8fa5dd02001-09-12 02:18:30 +00001909 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910 Py_INCREF(self);
1911 return (PyObject*)self;
1912 }
1913 else
1914 return PyString_FromStringAndSize(s+i, j-i);
1915}
1916
1917
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001918static PyObject *
1919do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1920{
1921 PyObject *sep = NULL;
1922
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001923 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001924 return NULL;
1925
1926 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001927 if (PyString_Check(sep))
1928 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001929#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001930 else if (PyUnicode_Check(sep)) {
1931 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1932 PyObject *res;
1933 if (uniself==NULL)
1934 return NULL;
1935 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1936 striptype, sep);
1937 Py_DECREF(uniself);
1938 return res;
1939 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001940#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001941 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001942 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001943#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001944 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001945#else
1946 "%s arg must be None or str",
1947#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001948 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001949 return NULL;
1950 }
1951 return do_xstrip(self, striptype, sep);
1952 }
1953
1954 return do_strip(self, striptype);
1955}
1956
1957
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001958PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001959"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960\n\
1961Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001962whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001963If chars is given and not None, remove characters in chars instead.\n\
1964If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965
1966static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001967string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001969 if (PyTuple_GET_SIZE(args) == 0)
1970 return do_strip(self, BOTHSTRIP); /* Common case */
1971 else
1972 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973}
1974
1975
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001976PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001977"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001979Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001980If chars is given and not None, remove characters in chars instead.\n\
1981If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982
1983static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001984string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001986 if (PyTuple_GET_SIZE(args) == 0)
1987 return do_strip(self, LEFTSTRIP); /* Common case */
1988 else
1989 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990}
1991
1992
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001993PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001994"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001996Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001997If chars is given and not None, remove characters in chars instead.\n\
1998If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999
2000static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002001string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002003 if (PyTuple_GET_SIZE(args) == 0)
2004 return do_strip(self, RIGHTSTRIP); /* Common case */
2005 else
2006 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007}
2008
2009
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002010PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011"S.lower() -> string\n\
2012\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002013Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014
2015static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002016string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002017{
2018 char *s = PyString_AS_STRING(self), *s_new;
2019 int i, n = PyString_GET_SIZE(self);
2020 PyObject *new;
2021
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022 new = PyString_FromStringAndSize(NULL, n);
2023 if (new == NULL)
2024 return NULL;
2025 s_new = PyString_AsString(new);
2026 for (i = 0; i < n; i++) {
2027 int c = Py_CHARMASK(*s++);
2028 if (isupper(c)) {
2029 *s_new = tolower(c);
2030 } else
2031 *s_new = c;
2032 s_new++;
2033 }
2034 return new;
2035}
2036
2037
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002038PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039"S.upper() -> string\n\
2040\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002041Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042
2043static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002044string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045{
2046 char *s = PyString_AS_STRING(self), *s_new;
2047 int i, n = PyString_GET_SIZE(self);
2048 PyObject *new;
2049
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002050 new = PyString_FromStringAndSize(NULL, n);
2051 if (new == NULL)
2052 return NULL;
2053 s_new = PyString_AsString(new);
2054 for (i = 0; i < n; i++) {
2055 int c = Py_CHARMASK(*s++);
2056 if (islower(c)) {
2057 *s_new = toupper(c);
2058 } else
2059 *s_new = c;
2060 s_new++;
2061 }
2062 return new;
2063}
2064
2065
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002066PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002067"S.title() -> string\n\
2068\n\
2069Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002070characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071
2072static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002073string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002074{
2075 char *s = PyString_AS_STRING(self), *s_new;
2076 int i, n = PyString_GET_SIZE(self);
2077 int previous_is_cased = 0;
2078 PyObject *new;
2079
Guido van Rossum4c08d552000-03-10 22:55:18 +00002080 new = PyString_FromStringAndSize(NULL, n);
2081 if (new == NULL)
2082 return NULL;
2083 s_new = PyString_AsString(new);
2084 for (i = 0; i < n; i++) {
2085 int c = Py_CHARMASK(*s++);
2086 if (islower(c)) {
2087 if (!previous_is_cased)
2088 c = toupper(c);
2089 previous_is_cased = 1;
2090 } else if (isupper(c)) {
2091 if (previous_is_cased)
2092 c = tolower(c);
2093 previous_is_cased = 1;
2094 } else
2095 previous_is_cased = 0;
2096 *s_new++ = c;
2097 }
2098 return new;
2099}
2100
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002101PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102"S.capitalize() -> string\n\
2103\n\
2104Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002105capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106
2107static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002108string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002109{
2110 char *s = PyString_AS_STRING(self), *s_new;
2111 int i, n = PyString_GET_SIZE(self);
2112 PyObject *new;
2113
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114 new = PyString_FromStringAndSize(NULL, n);
2115 if (new == NULL)
2116 return NULL;
2117 s_new = PyString_AsString(new);
2118 if (0 < n) {
2119 int c = Py_CHARMASK(*s++);
2120 if (islower(c))
2121 *s_new = toupper(c);
2122 else
2123 *s_new = c;
2124 s_new++;
2125 }
2126 for (i = 1; i < n; i++) {
2127 int c = Py_CHARMASK(*s++);
2128 if (isupper(c))
2129 *s_new = tolower(c);
2130 else
2131 *s_new = c;
2132 s_new++;
2133 }
2134 return new;
2135}
2136
2137
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002138PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139"S.count(sub[, start[, end]]) -> int\n\
2140\n\
2141Return the number of occurrences of substring sub in string\n\
2142S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002143interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002144
2145static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002146string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002148 const char *s = PyString_AS_STRING(self), *sub, *t;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149 int len = PyString_GET_SIZE(self), n;
2150 int i = 0, last = INT_MAX;
2151 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002152 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153
Guido van Rossumc6821402000-05-08 14:08:05 +00002154 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2155 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002157
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158 if (PyString_Check(subobj)) {
2159 sub = PyString_AS_STRING(subobj);
2160 n = PyString_GET_SIZE(subobj);
2161 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002162#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002163 else if (PyUnicode_Check(subobj)) {
2164 int count;
2165 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2166 if (count == -1)
2167 return NULL;
2168 else
2169 return PyInt_FromLong((long) count);
2170 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002171#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002172 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2173 return NULL;
2174
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002175 string_adjust_indices(&i, &last, len);
2176
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177 m = last + 1 - n;
2178 if (n == 0)
2179 return PyInt_FromLong((long) (m-i));
2180
2181 r = 0;
2182 while (i < m) {
2183 if (!memcmp(s+i, sub, n)) {
2184 r++;
2185 i += n;
2186 } else {
2187 i++;
2188 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002189 if (i >= m)
2190 break;
2191 t = memchr(s+i, sub[0], m-i);
2192 if (t == NULL)
2193 break;
2194 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195 }
2196 return PyInt_FromLong((long) r);
2197}
2198
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002199PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002200"S.swapcase() -> string\n\
2201\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002203converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204
2205static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002206string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207{
2208 char *s = PyString_AS_STRING(self), *s_new;
2209 int i, n = PyString_GET_SIZE(self);
2210 PyObject *new;
2211
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212 new = PyString_FromStringAndSize(NULL, n);
2213 if (new == NULL)
2214 return NULL;
2215 s_new = PyString_AsString(new);
2216 for (i = 0; i < n; i++) {
2217 int c = Py_CHARMASK(*s++);
2218 if (islower(c)) {
2219 *s_new = toupper(c);
2220 }
2221 else if (isupper(c)) {
2222 *s_new = tolower(c);
2223 }
2224 else
2225 *s_new = c;
2226 s_new++;
2227 }
2228 return new;
2229}
2230
2231
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002232PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233"S.translate(table [,deletechars]) -> string\n\
2234\n\
2235Return a copy of the string S, where all characters occurring\n\
2236in the optional argument deletechars are removed, and the\n\
2237remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002238translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239
2240static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002241string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002243 register char *input, *output;
2244 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245 register int i, c, changed = 0;
2246 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248 int inlen, tablen, dellen = 0;
2249 PyObject *result;
2250 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002251 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002253 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256
2257 if (PyString_Check(tableobj)) {
2258 table1 = PyString_AS_STRING(tableobj);
2259 tablen = PyString_GET_SIZE(tableobj);
2260 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002261#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002263 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002264 parameter; instead a mapping to None will cause characters
2265 to be deleted. */
2266 if (delobj != NULL) {
2267 PyErr_SetString(PyExc_TypeError,
2268 "deletions are implemented differently for unicode");
2269 return NULL;
2270 }
2271 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2272 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002273#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276
Martin v. Löwis00b61272002-12-12 20:03:19 +00002277 if (tablen != 256) {
2278 PyErr_SetString(PyExc_ValueError,
2279 "translation table must be 256 characters long");
2280 return NULL;
2281 }
2282
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 if (delobj != NULL) {
2284 if (PyString_Check(delobj)) {
2285 del_table = PyString_AS_STRING(delobj);
2286 dellen = PyString_GET_SIZE(delobj);
2287 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002288#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002289 else if (PyUnicode_Check(delobj)) {
2290 PyErr_SetString(PyExc_TypeError,
2291 "deletions are implemented differently for unicode");
2292 return NULL;
2293 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002294#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2296 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002297 }
2298 else {
2299 del_table = NULL;
2300 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301 }
2302
2303 table = table1;
2304 inlen = PyString_Size(input_obj);
2305 result = PyString_FromStringAndSize((char *)NULL, inlen);
2306 if (result == NULL)
2307 return NULL;
2308 output_start = output = PyString_AsString(result);
2309 input = PyString_AsString(input_obj);
2310
2311 if (dellen == 0) {
2312 /* If no deletions are required, use faster code */
2313 for (i = inlen; --i >= 0; ) {
2314 c = Py_CHARMASK(*input++);
2315 if (Py_CHARMASK((*output++ = table[c])) != c)
2316 changed = 1;
2317 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002318 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319 return result;
2320 Py_DECREF(result);
2321 Py_INCREF(input_obj);
2322 return input_obj;
2323 }
2324
2325 for (i = 0; i < 256; i++)
2326 trans_table[i] = Py_CHARMASK(table[i]);
2327
2328 for (i = 0; i < dellen; i++)
2329 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2330
2331 for (i = inlen; --i >= 0; ) {
2332 c = Py_CHARMASK(*input++);
2333 if (trans_table[c] != -1)
2334 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2335 continue;
2336 changed = 1;
2337 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002338 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 Py_DECREF(result);
2340 Py_INCREF(input_obj);
2341 return input_obj;
2342 }
2343 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002344 if (inlen > 0)
2345 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346 return result;
2347}
2348
2349
2350/* What follows is used for implementing replace(). Perry Stoll. */
2351
2352/*
2353 mymemfind
2354
2355 strstr replacement for arbitrary blocks of memory.
2356
Barry Warsaw51ac5802000-03-20 16:36:48 +00002357 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358 contents of memory pointed to by PAT. Returns the index into MEM if
2359 found, or -1 if not found. If len of PAT is greater than length of
2360 MEM, the function returns -1.
2361*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002362static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002363mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364{
2365 register int ii;
2366
2367 /* pattern can not occur in the last pat_len-1 chars */
2368 len -= pat_len;
2369
2370 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002371 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 return ii;
2373 }
2374 }
2375 return -1;
2376}
2377
2378/*
2379 mymemcnt
2380
2381 Return the number of distinct times PAT is found in MEM.
2382 meaning mem=1111 and pat==11 returns 2.
2383 mem=11111 and pat==11 also return 2.
2384 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002385static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002386mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002387{
2388 register int offset = 0;
2389 int nfound = 0;
2390
2391 while (len >= 0) {
2392 offset = mymemfind(mem, len, pat, pat_len);
2393 if (offset == -1)
2394 break;
2395 mem += offset + pat_len;
2396 len -= offset + pat_len;
2397 nfound++;
2398 }
2399 return nfound;
2400}
2401
2402/*
2403 mymemreplace
2404
Thomas Wouters7e474022000-07-16 12:04:32 +00002405 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406 replaced with SUB.
2407
Thomas Wouters7e474022000-07-16 12:04:32 +00002408 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409 of PAT in STR, then the original string is returned. Otherwise, a new
2410 string is allocated here and returned.
2411
2412 on return, out_len is:
2413 the length of output string, or
2414 -1 if the input string is returned, or
2415 unchanged if an error occurs (no memory).
2416
2417 return value is:
2418 the new string allocated locally, or
2419 NULL if an error occurred.
2420*/
2421static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002422mymemreplace(const char *str, int len, /* input string */
2423 const char *pat, int pat_len, /* pattern string to find */
2424 const char *sub, int sub_len, /* substitution string */
2425 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002426 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427{
2428 char *out_s;
2429 char *new_s;
2430 int nfound, offset, new_len;
2431
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002432 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 goto return_same;
2434
2435 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002436 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002437 if (count < 0)
2438 count = INT_MAX;
2439 else if (nfound > count)
2440 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441 if (nfound == 0)
2442 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002443
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002445 if (new_len == 0) {
2446 /* Have to allocate something for the caller to free(). */
2447 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002448 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002449 return NULL;
2450 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002451 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002452 else {
2453 assert(new_len > 0);
2454 new_s = (char *)PyMem_MALLOC(new_len);
2455 if (new_s == NULL)
2456 return NULL;
2457 out_s = new_s;
2458
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002459 if (pat_len > 0) {
2460 for (; nfound > 0; --nfound) {
2461 /* find index of next instance of pattern */
2462 offset = mymemfind(str, len, pat, pat_len);
2463 if (offset == -1)
2464 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002465
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002466 /* copy non matching part of input string */
2467 memcpy(new_s, str, offset);
2468 str += offset + pat_len;
2469 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002470
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002471 /* copy substitute into the output string */
2472 new_s += offset;
2473 memcpy(new_s, sub, sub_len);
2474 new_s += sub_len;
2475 }
2476 /* copy any remaining values into output string */
2477 if (len > 0)
2478 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002479 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002480 else {
2481 for (;;++str, --len) {
2482 memcpy(new_s, sub, sub_len);
2483 new_s += sub_len;
2484 if (--nfound <= 0) {
2485 memcpy(new_s, str, len);
2486 break;
2487 }
2488 *new_s++ = *str;
2489 }
2490 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002491 }
2492 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002493 return out_s;
2494
2495 return_same:
2496 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002497 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002498}
2499
2500
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002501PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002502"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002503\n\
2504Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002505old replaced by new. If the optional argument count is\n\
2506given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002507
2508static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002509string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002510{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002511 const char *str = PyString_AS_STRING(self), *sub, *repl;
2512 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002513 const int len = PyString_GET_SIZE(self);
2514 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002516 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002518
Guido van Rossum4c08d552000-03-10 22:55:18 +00002519 if (!PyArg_ParseTuple(args, "OO|i:replace",
2520 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522
2523 if (PyString_Check(subobj)) {
2524 sub = PyString_AS_STRING(subobj);
2525 sub_len = PyString_GET_SIZE(subobj);
2526 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002527#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002529 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002531#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2533 return NULL;
2534
2535 if (PyString_Check(replobj)) {
2536 repl = PyString_AS_STRING(replobj);
2537 repl_len = PyString_GET_SIZE(replobj);
2538 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002539#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002540 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002541 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002543#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2545 return NULL;
2546
Guido van Rossum4c08d552000-03-10 22:55:18 +00002547 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002548 if (new_s == NULL) {
2549 PyErr_NoMemory();
2550 return NULL;
2551 }
2552 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002553 if (PyString_CheckExact(self)) {
2554 /* we're returning another reference to self */
2555 new = (PyObject*)self;
2556 Py_INCREF(new);
2557 }
2558 else {
2559 new = PyString_FromStringAndSize(str, len);
2560 if (new == NULL)
2561 return NULL;
2562 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002563 }
2564 else {
2565 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002566 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002567 }
2568 return new;
2569}
2570
2571
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002572PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002573"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002574\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002575Return True if S starts with the specified prefix, False otherwise.\n\
2576With optional start, test S beginning at that position.\n\
2577With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002578
2579static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002580string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002581{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002582 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002583 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002584 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002585 int plen;
2586 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002587 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002588 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002589
Guido van Rossumc6821402000-05-08 14:08:05 +00002590 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2591 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002592 return NULL;
2593 if (PyString_Check(subobj)) {
2594 prefix = PyString_AS_STRING(subobj);
2595 plen = PyString_GET_SIZE(subobj);
2596 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002597#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002598 else if (PyUnicode_Check(subobj)) {
2599 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002600 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002601 subobj, start, end, -1);
2602 if (rc == -1)
2603 return NULL;
2604 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002605 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002606 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002607#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002608 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002609 return NULL;
2610
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002611 string_adjust_indices(&start, &end, len);
2612
2613 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002614 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002615
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002616 if (end-start >= plen)
2617 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2618 else
2619 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002620}
2621
2622
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002623PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002624"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002625\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002626Return True if S ends with the specified suffix, False otherwise.\n\
2627With optional start, test S beginning at that position.\n\
2628With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002629
2630static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002631string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002632{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002634 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635 const char* suffix;
2636 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002637 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002638 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002639 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002640
Guido van Rossumc6821402000-05-08 14:08:05 +00002641 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2642 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002643 return NULL;
2644 if (PyString_Check(subobj)) {
2645 suffix = PyString_AS_STRING(subobj);
2646 slen = PyString_GET_SIZE(subobj);
2647 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002648#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002649 else if (PyUnicode_Check(subobj)) {
2650 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002651 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002652 subobj, start, end, +1);
2653 if (rc == -1)
2654 return NULL;
2655 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002656 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002657 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002658#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002659 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002660 return NULL;
2661
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002662 string_adjust_indices(&start, &end, len);
2663
2664 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002665 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002666
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002667 if (end-slen > start)
2668 start = end - slen;
2669 if (end-start >= slen)
2670 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2671 else
2672 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002673}
2674
2675
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002676PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002677"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002678\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002679Encodes S using the codec registered for encoding. encoding defaults\n\
2680to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002681handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002682a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2683'xmlcharrefreplace' as well as any other name registered with\n\
2684codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002685
2686static PyObject *
2687string_encode(PyStringObject *self, PyObject *args)
2688{
2689 char *encoding = NULL;
2690 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002691 PyObject *v;
2692
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002693 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2694 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002695 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002696 if (v == NULL)
2697 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002698 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2699 PyErr_Format(PyExc_TypeError,
2700 "encoder did not return a string/unicode object "
2701 "(type=%.400s)",
2702 v->ob_type->tp_name);
2703 Py_DECREF(v);
2704 return NULL;
2705 }
2706 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002707
2708 onError:
2709 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002710}
2711
2712
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002713PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002714"S.decode([encoding[,errors]]) -> object\n\
2715\n\
2716Decodes S using the codec registered for encoding. encoding defaults\n\
2717to the default encoding. errors may be given to set a different error\n\
2718handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002719a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2720as well as any other name registerd with codecs.register_error that is\n\
2721able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002722
2723static PyObject *
2724string_decode(PyStringObject *self, PyObject *args)
2725{
2726 char *encoding = NULL;
2727 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002728 PyObject *v;
2729
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002730 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2731 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002732 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002733 if (v == NULL)
2734 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002735 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2736 PyErr_Format(PyExc_TypeError,
2737 "decoder did not return a string/unicode object "
2738 "(type=%.400s)",
2739 v->ob_type->tp_name);
2740 Py_DECREF(v);
2741 return NULL;
2742 }
2743 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002744
2745 onError:
2746 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002747}
2748
2749
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002750PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002751"S.expandtabs([tabsize]) -> string\n\
2752\n\
2753Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002754If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002755
2756static PyObject*
2757string_expandtabs(PyStringObject *self, PyObject *args)
2758{
2759 const char *e, *p;
2760 char *q;
2761 int i, j;
2762 PyObject *u;
2763 int tabsize = 8;
2764
2765 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2766 return NULL;
2767
Thomas Wouters7e474022000-07-16 12:04:32 +00002768 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002769 i = j = 0;
2770 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2771 for (p = PyString_AS_STRING(self); p < e; p++)
2772 if (*p == '\t') {
2773 if (tabsize > 0)
2774 j += tabsize - (j % tabsize);
2775 }
2776 else {
2777 j++;
2778 if (*p == '\n' || *p == '\r') {
2779 i += j;
2780 j = 0;
2781 }
2782 }
2783
2784 /* Second pass: create output string and fill it */
2785 u = PyString_FromStringAndSize(NULL, i + j);
2786 if (!u)
2787 return NULL;
2788
2789 j = 0;
2790 q = PyString_AS_STRING(u);
2791
2792 for (p = PyString_AS_STRING(self); p < e; p++)
2793 if (*p == '\t') {
2794 if (tabsize > 0) {
2795 i = tabsize - (j % tabsize);
2796 j += i;
2797 while (i--)
2798 *q++ = ' ';
2799 }
2800 }
2801 else {
2802 j++;
2803 *q++ = *p;
2804 if (*p == '\n' || *p == '\r')
2805 j = 0;
2806 }
2807
2808 return u;
2809}
2810
Tim Peters8fa5dd02001-09-12 02:18:30 +00002811static PyObject *
2812pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002813{
2814 PyObject *u;
2815
2816 if (left < 0)
2817 left = 0;
2818 if (right < 0)
2819 right = 0;
2820
Tim Peters8fa5dd02001-09-12 02:18:30 +00002821 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002822 Py_INCREF(self);
2823 return (PyObject *)self;
2824 }
2825
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002826 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002827 left + PyString_GET_SIZE(self) + right);
2828 if (u) {
2829 if (left)
2830 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002831 memcpy(PyString_AS_STRING(u) + left,
2832 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002833 PyString_GET_SIZE(self));
2834 if (right)
2835 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2836 fill, right);
2837 }
2838
2839 return u;
2840}
2841
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002842PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002843"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002844"\n"
2845"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002846"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002847
2848static PyObject *
2849string_ljust(PyStringObject *self, PyObject *args)
2850{
2851 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002852 char fillchar = ' ';
2853
2854 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002855 return NULL;
2856
Tim Peters8fa5dd02001-09-12 02:18:30 +00002857 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002858 Py_INCREF(self);
2859 return (PyObject*) self;
2860 }
2861
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002862 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002863}
2864
2865
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002866PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002867"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002868"\n"
2869"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002870"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871
2872static PyObject *
2873string_rjust(PyStringObject *self, PyObject *args)
2874{
2875 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002876 char fillchar = ' ';
2877
2878 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002879 return NULL;
2880
Tim Peters8fa5dd02001-09-12 02:18:30 +00002881 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002882 Py_INCREF(self);
2883 return (PyObject*) self;
2884 }
2885
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002886 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002887}
2888
2889
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002890PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002891"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002892"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002893"Return S centered in a string of length width. Padding is\n"
2894"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002895
2896static PyObject *
2897string_center(PyStringObject *self, PyObject *args)
2898{
2899 int marg, left;
2900 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002901 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002902
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002903 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002904 return NULL;
2905
Tim Peters8fa5dd02001-09-12 02:18:30 +00002906 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002907 Py_INCREF(self);
2908 return (PyObject*) self;
2909 }
2910
2911 marg = width - PyString_GET_SIZE(self);
2912 left = marg / 2 + (marg & width & 1);
2913
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002914 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002915}
2916
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002917PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002918"S.zfill(width) -> string\n"
2919"\n"
2920"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002921"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002922
2923static PyObject *
2924string_zfill(PyStringObject *self, PyObject *args)
2925{
2926 int fill;
2927 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002928 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002929
2930 int width;
2931 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2932 return NULL;
2933
2934 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002935 if (PyString_CheckExact(self)) {
2936 Py_INCREF(self);
2937 return (PyObject*) self;
2938 }
2939 else
2940 return PyString_FromStringAndSize(
2941 PyString_AS_STRING(self),
2942 PyString_GET_SIZE(self)
2943 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002944 }
2945
2946 fill = width - PyString_GET_SIZE(self);
2947
2948 s = pad(self, fill, 0, '0');
2949
2950 if (s == NULL)
2951 return NULL;
2952
2953 p = PyString_AS_STRING(s);
2954 if (p[fill] == '+' || p[fill] == '-') {
2955 /* move sign to beginning of string */
2956 p[0] = p[fill];
2957 p[fill] = '0';
2958 }
2959
2960 return (PyObject*) s;
2961}
2962
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002963PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002964"S.isspace() -> bool\n\
2965\n\
2966Return True if all characters in S are whitespace\n\
2967and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002968
2969static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002970string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002971{
Fred Drakeba096332000-07-09 07:04:36 +00002972 register const unsigned char *p
2973 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002974 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002975
Guido van Rossum4c08d552000-03-10 22:55:18 +00002976 /* Shortcut for single character strings */
2977 if (PyString_GET_SIZE(self) == 1 &&
2978 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002979 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002980
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002981 /* Special case for empty strings */
2982 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002983 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002984
Guido van Rossum4c08d552000-03-10 22:55:18 +00002985 e = p + PyString_GET_SIZE(self);
2986 for (; p < e; p++) {
2987 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002988 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002989 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002990 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002991}
2992
2993
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002994PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002995"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002996\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002997Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002998and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002999
3000static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003001string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003002{
Fred Drakeba096332000-07-09 07:04:36 +00003003 register const unsigned char *p
3004 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003005 register const unsigned char *e;
3006
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003007 /* Shortcut for single character strings */
3008 if (PyString_GET_SIZE(self) == 1 &&
3009 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003010 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003011
3012 /* Special case for empty strings */
3013 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003014 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003015
3016 e = p + PyString_GET_SIZE(self);
3017 for (; p < e; p++) {
3018 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003019 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003020 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003021 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003022}
3023
3024
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003025PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003026"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003027\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003028Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003029and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003030
3031static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003032string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003033{
Fred Drakeba096332000-07-09 07:04:36 +00003034 register const unsigned char *p
3035 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003036 register const unsigned char *e;
3037
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003038 /* Shortcut for single character strings */
3039 if (PyString_GET_SIZE(self) == 1 &&
3040 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003041 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003042
3043 /* Special case for empty strings */
3044 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003045 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003046
3047 e = p + PyString_GET_SIZE(self);
3048 for (; p < e; p++) {
3049 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003050 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003051 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003052 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003053}
3054
3055
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003056PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003057"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003058\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003059Return True if all characters in S are digits\n\
3060and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003061
3062static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003063string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003064{
Fred Drakeba096332000-07-09 07:04:36 +00003065 register const unsigned char *p
3066 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003067 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003068
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069 /* Shortcut for single character strings */
3070 if (PyString_GET_SIZE(self) == 1 &&
3071 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003072 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003073
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003074 /* Special case for empty strings */
3075 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003076 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003077
Guido van Rossum4c08d552000-03-10 22:55:18 +00003078 e = p + PyString_GET_SIZE(self);
3079 for (; p < e; p++) {
3080 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003081 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003083 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003084}
3085
3086
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003087PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003088"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003089\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003090Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003091at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003092
3093static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003094string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003095{
Fred Drakeba096332000-07-09 07:04:36 +00003096 register const unsigned char *p
3097 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003098 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003099 int cased;
3100
Guido van Rossum4c08d552000-03-10 22:55:18 +00003101 /* Shortcut for single character strings */
3102 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003103 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003104
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003105 /* Special case for empty strings */
3106 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003107 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003108
Guido van Rossum4c08d552000-03-10 22:55:18 +00003109 e = p + PyString_GET_SIZE(self);
3110 cased = 0;
3111 for (; p < e; p++) {
3112 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003113 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114 else if (!cased && islower(*p))
3115 cased = 1;
3116 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003117 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003118}
3119
3120
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003121PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003122"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003123\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003124Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003125at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003126
3127static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003128string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003129{
Fred Drakeba096332000-07-09 07:04:36 +00003130 register const unsigned char *p
3131 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003132 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003133 int cased;
3134
Guido van Rossum4c08d552000-03-10 22:55:18 +00003135 /* Shortcut for single character strings */
3136 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003137 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003138
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003139 /* Special case for empty strings */
3140 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003141 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003142
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143 e = p + PyString_GET_SIZE(self);
3144 cased = 0;
3145 for (; p < e; p++) {
3146 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003147 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003148 else if (!cased && isupper(*p))
3149 cased = 1;
3150 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003151 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003152}
3153
3154
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003155PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003156"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003157\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003158Return True if S is a titlecased string and there is at least one\n\
3159character in S, i.e. uppercase characters may only follow uncased\n\
3160characters and lowercase characters only cased ones. Return False\n\
3161otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003162
3163static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003164string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003165{
Fred Drakeba096332000-07-09 07:04:36 +00003166 register const unsigned char *p
3167 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003168 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 int cased, previous_is_cased;
3170
Guido van Rossum4c08d552000-03-10 22:55:18 +00003171 /* Shortcut for single character strings */
3172 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003173 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003174
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003175 /* Special case for empty strings */
3176 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003177 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003178
Guido van Rossum4c08d552000-03-10 22:55:18 +00003179 e = p + PyString_GET_SIZE(self);
3180 cased = 0;
3181 previous_is_cased = 0;
3182 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003183 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003184
3185 if (isupper(ch)) {
3186 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003187 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003188 previous_is_cased = 1;
3189 cased = 1;
3190 }
3191 else if (islower(ch)) {
3192 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003193 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003194 previous_is_cased = 1;
3195 cased = 1;
3196 }
3197 else
3198 previous_is_cased = 0;
3199 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003200 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003201}
3202
3203
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003204PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003205"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003206\n\
3207Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003208Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003209is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003210
Guido van Rossum4c08d552000-03-10 22:55:18 +00003211static PyObject*
3212string_splitlines(PyStringObject *self, PyObject *args)
3213{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003214 register int i;
3215 register int j;
3216 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003217 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003218 PyObject *list;
3219 PyObject *str;
3220 char *data;
3221
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003222 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003223 return NULL;
3224
3225 data = PyString_AS_STRING(self);
3226 len = PyString_GET_SIZE(self);
3227
Guido van Rossum4c08d552000-03-10 22:55:18 +00003228 list = PyList_New(0);
3229 if (!list)
3230 goto onError;
3231
3232 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003233 int eol;
3234
Guido van Rossum4c08d552000-03-10 22:55:18 +00003235 /* Find a line and append it */
3236 while (i < len && data[i] != '\n' && data[i] != '\r')
3237 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003238
3239 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003240 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003241 if (i < len) {
3242 if (data[i] == '\r' && i + 1 < len &&
3243 data[i+1] == '\n')
3244 i += 2;
3245 else
3246 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003247 if (keepends)
3248 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003249 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003250 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003251 j = i;
3252 }
3253 if (j < len) {
3254 SPLIT_APPEND(data, j, len);
3255 }
3256
3257 return list;
3258
3259 onError:
3260 Py_DECREF(list);
3261 return NULL;
3262}
3263
3264#undef SPLIT_APPEND
3265
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003266static PyObject *
3267string_getnewargs(PyStringObject *v)
3268{
3269 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3270}
3271
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003272
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003273static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003274string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003275 /* Counterparts of the obsolete stropmodule functions; except
3276 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003277 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3278 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003279 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003280 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3281 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003282 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3283 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3284 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3285 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3286 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3287 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3288 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003289 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3290 capitalize__doc__},
3291 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3292 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3293 endswith__doc__},
3294 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3295 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3296 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3297 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3298 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3299 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3300 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3301 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3302 startswith__doc__},
3303 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3304 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3305 swapcase__doc__},
3306 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3307 translate__doc__},
3308 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3309 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3310 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3311 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3312 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3313 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3314 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3315 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3316 expandtabs__doc__},
3317 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3318 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003319 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003320 {NULL, NULL} /* sentinel */
3321};
3322
Jeremy Hylton938ace62002-07-17 16:30:39 +00003323static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003324str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3325
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003326static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003327string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003328{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003329 PyObject *x = NULL;
3330 static char *kwlist[] = {"object", 0};
3331
Guido van Rossumae960af2001-08-30 03:11:59 +00003332 if (type != &PyString_Type)
3333 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003334 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3335 return NULL;
3336 if (x == NULL)
3337 return PyString_FromString("");
3338 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003339}
3340
Guido van Rossumae960af2001-08-30 03:11:59 +00003341static PyObject *
3342str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3343{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003344 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003345 int n;
3346
3347 assert(PyType_IsSubtype(type, &PyString_Type));
3348 tmp = string_new(&PyString_Type, args, kwds);
3349 if (tmp == NULL)
3350 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003351 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003352 n = PyString_GET_SIZE(tmp);
3353 pnew = type->tp_alloc(type, n);
3354 if (pnew != NULL) {
3355 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003356 ((PyStringObject *)pnew)->ob_shash =
3357 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003358 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003359 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003360 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003361 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003362}
3363
Guido van Rossumcacfc072002-05-24 19:01:59 +00003364static PyObject *
3365basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3366{
3367 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003368 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003369 return NULL;
3370}
3371
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003372static PyObject *
3373string_mod(PyObject *v, PyObject *w)
3374{
3375 if (!PyString_Check(v)) {
3376 Py_INCREF(Py_NotImplemented);
3377 return Py_NotImplemented;
3378 }
3379 return PyString_Format(v, w);
3380}
3381
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003382PyDoc_STRVAR(basestring_doc,
3383"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003384
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003385static PyNumberMethods string_as_number = {
3386 0, /*nb_add*/
3387 0, /*nb_subtract*/
3388 0, /*nb_multiply*/
3389 0, /*nb_divide*/
3390 string_mod, /*nb_remainder*/
3391};
3392
3393
Guido van Rossumcacfc072002-05-24 19:01:59 +00003394PyTypeObject PyBaseString_Type = {
3395 PyObject_HEAD_INIT(&PyType_Type)
3396 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003397 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003398 0,
3399 0,
3400 0, /* tp_dealloc */
3401 0, /* tp_print */
3402 0, /* tp_getattr */
3403 0, /* tp_setattr */
3404 0, /* tp_compare */
3405 0, /* tp_repr */
3406 0, /* tp_as_number */
3407 0, /* tp_as_sequence */
3408 0, /* tp_as_mapping */
3409 0, /* tp_hash */
3410 0, /* tp_call */
3411 0, /* tp_str */
3412 0, /* tp_getattro */
3413 0, /* tp_setattro */
3414 0, /* tp_as_buffer */
3415 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3416 basestring_doc, /* tp_doc */
3417 0, /* tp_traverse */
3418 0, /* tp_clear */
3419 0, /* tp_richcompare */
3420 0, /* tp_weaklistoffset */
3421 0, /* tp_iter */
3422 0, /* tp_iternext */
3423 0, /* tp_methods */
3424 0, /* tp_members */
3425 0, /* tp_getset */
3426 &PyBaseObject_Type, /* tp_base */
3427 0, /* tp_dict */
3428 0, /* tp_descr_get */
3429 0, /* tp_descr_set */
3430 0, /* tp_dictoffset */
3431 0, /* tp_init */
3432 0, /* tp_alloc */
3433 basestring_new, /* tp_new */
3434 0, /* tp_free */
3435};
3436
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003437PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003438"str(object) -> string\n\
3439\n\
3440Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003441If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003442
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003443PyTypeObject PyString_Type = {
3444 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003445 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003446 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003447 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003448 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003449 (destructor)string_dealloc, /* tp_dealloc */
3450 (printfunc)string_print, /* tp_print */
3451 0, /* tp_getattr */
3452 0, /* tp_setattr */
3453 0, /* tp_compare */
3454 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003455 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003456 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003457 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003458 (hashfunc)string_hash, /* tp_hash */
3459 0, /* tp_call */
3460 (reprfunc)string_str, /* tp_str */
3461 PyObject_GenericGetAttr, /* tp_getattro */
3462 0, /* tp_setattro */
3463 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003464 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3465 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003466 string_doc, /* tp_doc */
3467 0, /* tp_traverse */
3468 0, /* tp_clear */
3469 (richcmpfunc)string_richcompare, /* tp_richcompare */
3470 0, /* tp_weaklistoffset */
3471 0, /* tp_iter */
3472 0, /* tp_iternext */
3473 string_methods, /* tp_methods */
3474 0, /* tp_members */
3475 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003476 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003477 0, /* tp_dict */
3478 0, /* tp_descr_get */
3479 0, /* tp_descr_set */
3480 0, /* tp_dictoffset */
3481 0, /* tp_init */
3482 0, /* tp_alloc */
3483 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003484 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003485};
3486
3487void
Fred Drakeba096332000-07-09 07:04:36 +00003488PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003489{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003490 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003491 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003492 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003493 if (w == NULL || !PyString_Check(*pv)) {
3494 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003495 *pv = NULL;
3496 return;
3497 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003498 v = string_concat((PyStringObject *) *pv, w);
3499 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003500 *pv = v;
3501}
3502
Guido van Rossum013142a1994-08-30 08:19:36 +00003503void
Fred Drakeba096332000-07-09 07:04:36 +00003504PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003505{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003506 PyString_Concat(pv, w);
3507 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003508}
3509
3510
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003511/* The following function breaks the notion that strings are immutable:
3512 it changes the size of a string. We get away with this only if there
3513 is only one module referencing the object. You can also think of it
3514 as creating a new string object and destroying the old one, only
3515 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003516 already be known to some other part of the code...
3517 Note that if there's not enough memory to resize the string, the original
3518 string object at *pv is deallocated, *pv is set to NULL, an "out of
3519 memory" exception is set, and -1 is returned. Else (on success) 0 is
3520 returned, and the value in *pv may or may not be the same as on input.
3521 As always, an extra byte is allocated for a trailing \0 byte (newsize
3522 does *not* include that), and a trailing \0 byte is stored.
3523*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003524
3525int
Fred Drakeba096332000-07-09 07:04:36 +00003526_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003527{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003528 register PyObject *v;
3529 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003530 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003531 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3532 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003533 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003534 Py_DECREF(v);
3535 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003536 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003537 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003538 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003539 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003540 _Py_ForgetReference(v);
3541 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003542 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003543 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003544 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003545 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003546 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003547 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003548 _Py_NewReference(*pv);
3549 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003550 sv->ob_size = newsize;
3551 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003552 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003553 return 0;
3554}
Guido van Rossume5372401993-03-16 12:15:04 +00003555
3556/* Helpers for formatstring */
3557
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003558static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003559getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003560{
3561 int argidx = *p_argidx;
3562 if (argidx < arglen) {
3563 (*p_argidx)++;
3564 if (arglen < 0)
3565 return args;
3566 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003567 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003568 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003569 PyErr_SetString(PyExc_TypeError,
3570 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003571 return NULL;
3572}
3573
Tim Peters38fd5b62000-09-21 05:43:11 +00003574/* Format codes
3575 * F_LJUST '-'
3576 * F_SIGN '+'
3577 * F_BLANK ' '
3578 * F_ALT '#'
3579 * F_ZERO '0'
3580 */
Guido van Rossume5372401993-03-16 12:15:04 +00003581#define F_LJUST (1<<0)
3582#define F_SIGN (1<<1)
3583#define F_BLANK (1<<2)
3584#define F_ALT (1<<3)
3585#define F_ZERO (1<<4)
3586
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003587static int
Fred Drakeba096332000-07-09 07:04:36 +00003588formatfloat(char *buf, size_t buflen, int flags,
3589 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003590{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003591 /* fmt = '%#.' + `prec` + `type`
3592 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003593 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003594 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003595 x = PyFloat_AsDouble(v);
3596 if (x == -1.0 && PyErr_Occurred()) {
3597 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003598 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003599 }
Guido van Rossume5372401993-03-16 12:15:04 +00003600 if (prec < 0)
3601 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003602 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3603 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003604 /* Worst case length calc to ensure no buffer overrun:
3605
3606 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003607 fmt = %#.<prec>g
3608 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003609 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003610 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003611
3612 'f' formats:
3613 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3614 len = 1 + 50 + 1 + prec = 52 + prec
3615
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003616 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003617 always given), therefore increase the length by one.
3618
3619 */
3620 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3621 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003622 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003623 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003624 return -1;
3625 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003626 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3627 (flags&F_ALT) ? "#" : "",
3628 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003629 PyOS_ascii_formatd(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003630 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003631}
3632
Tim Peters38fd5b62000-09-21 05:43:11 +00003633/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3634 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3635 * Python's regular ints.
3636 * Return value: a new PyString*, or NULL if error.
3637 * . *pbuf is set to point into it,
3638 * *plen set to the # of chars following that.
3639 * Caller must decref it when done using pbuf.
3640 * The string starting at *pbuf is of the form
3641 * "-"? ("0x" | "0X")? digit+
3642 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003643 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003644 * There will be at least prec digits, zero-filled on the left if
3645 * necessary to get that many.
3646 * val object to be converted
3647 * flags bitmask of format flags; only F_ALT is looked at
3648 * prec minimum number of digits; 0-fill on left if needed
3649 * type a character in [duoxX]; u acts the same as d
3650 *
3651 * CAUTION: o, x and X conversions on regular ints can never
3652 * produce a '-' sign, but can for Python's unbounded ints.
3653 */
3654PyObject*
3655_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3656 char **pbuf, int *plen)
3657{
3658 PyObject *result = NULL;
3659 char *buf;
3660 int i;
3661 int sign; /* 1 if '-', else 0 */
3662 int len; /* number of characters */
3663 int numdigits; /* len == numnondigits + numdigits */
3664 int numnondigits = 0;
3665
3666 switch (type) {
3667 case 'd':
3668 case 'u':
3669 result = val->ob_type->tp_str(val);
3670 break;
3671 case 'o':
3672 result = val->ob_type->tp_as_number->nb_oct(val);
3673 break;
3674 case 'x':
3675 case 'X':
3676 numnondigits = 2;
3677 result = val->ob_type->tp_as_number->nb_hex(val);
3678 break;
3679 default:
3680 assert(!"'type' not in [duoxX]");
3681 }
3682 if (!result)
3683 return NULL;
3684
3685 /* To modify the string in-place, there can only be one reference. */
3686 if (result->ob_refcnt != 1) {
3687 PyErr_BadInternalCall();
3688 return NULL;
3689 }
3690 buf = PyString_AsString(result);
3691 len = PyString_Size(result);
3692 if (buf[len-1] == 'L') {
3693 --len;
3694 buf[len] = '\0';
3695 }
3696 sign = buf[0] == '-';
3697 numnondigits += sign;
3698 numdigits = len - numnondigits;
3699 assert(numdigits > 0);
3700
Tim Petersfff53252001-04-12 18:38:48 +00003701 /* Get rid of base marker unless F_ALT */
3702 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003703 /* Need to skip 0x, 0X or 0. */
3704 int skipped = 0;
3705 switch (type) {
3706 case 'o':
3707 assert(buf[sign] == '0');
3708 /* If 0 is only digit, leave it alone. */
3709 if (numdigits > 1) {
3710 skipped = 1;
3711 --numdigits;
3712 }
3713 break;
3714 case 'x':
3715 case 'X':
3716 assert(buf[sign] == '0');
3717 assert(buf[sign + 1] == 'x');
3718 skipped = 2;
3719 numnondigits -= 2;
3720 break;
3721 }
3722 if (skipped) {
3723 buf += skipped;
3724 len -= skipped;
3725 if (sign)
3726 buf[0] = '-';
3727 }
3728 assert(len == numnondigits + numdigits);
3729 assert(numdigits > 0);
3730 }
3731
3732 /* Fill with leading zeroes to meet minimum width. */
3733 if (prec > numdigits) {
3734 PyObject *r1 = PyString_FromStringAndSize(NULL,
3735 numnondigits + prec);
3736 char *b1;
3737 if (!r1) {
3738 Py_DECREF(result);
3739 return NULL;
3740 }
3741 b1 = PyString_AS_STRING(r1);
3742 for (i = 0; i < numnondigits; ++i)
3743 *b1++ = *buf++;
3744 for (i = 0; i < prec - numdigits; i++)
3745 *b1++ = '0';
3746 for (i = 0; i < numdigits; i++)
3747 *b1++ = *buf++;
3748 *b1 = '\0';
3749 Py_DECREF(result);
3750 result = r1;
3751 buf = PyString_AS_STRING(result);
3752 len = numnondigits + prec;
3753 }
3754
3755 /* Fix up case for hex conversions. */
3756 switch (type) {
3757 case 'x':
3758 /* Need to convert all upper case letters to lower case. */
3759 for (i = 0; i < len; i++)
3760 if (buf[i] >= 'A' && buf[i] <= 'F')
3761 buf[i] += 'a'-'A';
3762 break;
3763 case 'X':
3764 /* Need to convert 0x to 0X (and -0x to -0X). */
3765 if (buf[sign + 1] == 'x')
3766 buf[sign + 1] = 'X';
3767 break;
3768 }
3769 *pbuf = buf;
3770 *plen = len;
3771 return result;
3772}
3773
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003774static int
Fred Drakeba096332000-07-09 07:04:36 +00003775formatint(char *buf, size_t buflen, int flags,
3776 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003777{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003778 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003779 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3780 + 1 + 1 = 24 */
3781 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003782 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003783 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003784
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003785 x = PyInt_AsLong(v);
3786 if (x == -1 && PyErr_Occurred()) {
3787 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003788 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003789 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003790 if (x < 0 && type == 'u') {
3791 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003792 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003793 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3794 sign = "-";
3795 else
3796 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003797 if (prec < 0)
3798 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003799
3800 if ((flags & F_ALT) &&
3801 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003802 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003803 * of issues that cause pain:
3804 * - when 0 is being converted, the C standard leaves off
3805 * the '0x' or '0X', which is inconsistent with other
3806 * %#x/%#X conversions and inconsistent with Python's
3807 * hex() function
3808 * - there are platforms that violate the standard and
3809 * convert 0 with the '0x' or '0X'
3810 * (Metrowerks, Compaq Tru64)
3811 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003812 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003813 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003814 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003815 * We can achieve the desired consistency by inserting our
3816 * own '0x' or '0X' prefix, and substituting %x/%X in place
3817 * of %#x/%#X.
3818 *
3819 * Note that this is the same approach as used in
3820 * formatint() in unicodeobject.c
3821 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003822 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3823 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003824 }
3825 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003826 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3827 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003828 prec, type);
3829 }
3830
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003831 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3832 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003833 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003834 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003835 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003836 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003837 return -1;
3838 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003839 if (sign[0])
3840 PyOS_snprintf(buf, buflen, fmt, -x);
3841 else
3842 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003843 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003844}
3845
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003846static int
Fred Drakeba096332000-07-09 07:04:36 +00003847formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003848{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003849 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003850 if (PyString_Check(v)) {
3851 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003852 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003853 }
3854 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003855 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003856 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003857 }
3858 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003859 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003860}
3861
Guido van Rossum013142a1994-08-30 08:19:36 +00003862
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003863/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3864
3865 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3866 chars are formatted. XXX This is a magic number. Each formatting
3867 routine does bounds checking to ensure no overflow, but a better
3868 solution may be to malloc a buffer of appropriate size for each
3869 format. For now, the current solution is sufficient.
3870*/
3871#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003872
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003873PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003874PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003875{
3876 char *fmt, *res;
3877 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003878 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003879 PyObject *result, *orig_args;
3880#ifdef Py_USING_UNICODE
3881 PyObject *v, *w;
3882#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003883 PyObject *dict = NULL;
3884 if (format == NULL || !PyString_Check(format) || args == NULL) {
3885 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003886 return NULL;
3887 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003888 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003889 fmt = PyString_AS_STRING(format);
3890 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003891 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003892 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003893 if (result == NULL)
3894 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003895 res = PyString_AsString(result);
3896 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003897 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003898 argidx = 0;
3899 }
3900 else {
3901 arglen = -1;
3902 argidx = -2;
3903 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003904 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3905 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003906 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003907 while (--fmtcnt >= 0) {
3908 if (*fmt != '%') {
3909 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003910 rescnt = fmtcnt + 100;
3911 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003912 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003913 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003914 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003915 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003916 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003917 }
3918 *res++ = *fmt++;
3919 }
3920 else {
3921 /* Got a format specifier */
3922 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003923 int width = -1;
3924 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003925 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003926 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003927 PyObject *v = NULL;
3928 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003929 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003930 int sign;
3931 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003932 char formatbuf[FORMATBUFLEN];
3933 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003934#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003935 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003936 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003937#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003938
Guido van Rossumda9c2711996-12-05 21:58:58 +00003939 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003940 if (*fmt == '(') {
3941 char *keystart;
3942 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003943 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003944 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003945
3946 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003947 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003948 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003949 goto error;
3950 }
3951 ++fmt;
3952 --fmtcnt;
3953 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003954 /* Skip over balanced parentheses */
3955 while (pcount > 0 && --fmtcnt >= 0) {
3956 if (*fmt == ')')
3957 --pcount;
3958 else if (*fmt == '(')
3959 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003960 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003961 }
3962 keylen = fmt - keystart - 1;
3963 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003964 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003965 "incomplete format key");
3966 goto error;
3967 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003968 key = PyString_FromStringAndSize(keystart,
3969 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003970 if (key == NULL)
3971 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003972 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003973 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003974 args_owned = 0;
3975 }
3976 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003977 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003978 if (args == NULL) {
3979 goto error;
3980 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003981 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003982 arglen = -1;
3983 argidx = -2;
3984 }
Guido van Rossume5372401993-03-16 12:15:04 +00003985 while (--fmtcnt >= 0) {
3986 switch (c = *fmt++) {
3987 case '-': flags |= F_LJUST; continue;
3988 case '+': flags |= F_SIGN; continue;
3989 case ' ': flags |= F_BLANK; continue;
3990 case '#': flags |= F_ALT; continue;
3991 case '0': flags |= F_ZERO; continue;
3992 }
3993 break;
3994 }
3995 if (c == '*') {
3996 v = getnextarg(args, arglen, &argidx);
3997 if (v == NULL)
3998 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003999 if (!PyInt_Check(v)) {
4000 PyErr_SetString(PyExc_TypeError,
4001 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004002 goto error;
4003 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004004 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004005 if (width < 0) {
4006 flags |= F_LJUST;
4007 width = -width;
4008 }
Guido van Rossume5372401993-03-16 12:15:04 +00004009 if (--fmtcnt >= 0)
4010 c = *fmt++;
4011 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004012 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004013 width = c - '0';
4014 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004015 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004016 if (!isdigit(c))
4017 break;
4018 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004019 PyErr_SetString(
4020 PyExc_ValueError,
4021 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004022 goto error;
4023 }
4024 width = width*10 + (c - '0');
4025 }
4026 }
4027 if (c == '.') {
4028 prec = 0;
4029 if (--fmtcnt >= 0)
4030 c = *fmt++;
4031 if (c == '*') {
4032 v = getnextarg(args, arglen, &argidx);
4033 if (v == NULL)
4034 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004035 if (!PyInt_Check(v)) {
4036 PyErr_SetString(
4037 PyExc_TypeError,
4038 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004039 goto error;
4040 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004041 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004042 if (prec < 0)
4043 prec = 0;
4044 if (--fmtcnt >= 0)
4045 c = *fmt++;
4046 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004047 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004048 prec = c - '0';
4049 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004050 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004051 if (!isdigit(c))
4052 break;
4053 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004054 PyErr_SetString(
4055 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004056 "prec too big");
4057 goto error;
4058 }
4059 prec = prec*10 + (c - '0');
4060 }
4061 }
4062 } /* prec */
4063 if (fmtcnt >= 0) {
4064 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004065 if (--fmtcnt >= 0)
4066 c = *fmt++;
4067 }
4068 }
4069 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004070 PyErr_SetString(PyExc_ValueError,
4071 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004072 goto error;
4073 }
4074 if (c != '%') {
4075 v = getnextarg(args, arglen, &argidx);
4076 if (v == NULL)
4077 goto error;
4078 }
4079 sign = 0;
4080 fill = ' ';
4081 switch (c) {
4082 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004083 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004084 len = 1;
4085 break;
4086 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004087#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004088 if (PyUnicode_Check(v)) {
4089 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004090 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004091 goto unicode;
4092 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004093#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004094 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004095 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004096 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00004097 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004098 else
4099 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004100 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004101 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004102 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00004103 /* XXX Note: this should never happen,
4104 since PyObject_Repr() and
4105 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004106 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004107 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004108 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004109 goto error;
4110 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004111 pbuf = PyString_AS_STRING(temp);
4112 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004113 if (prec >= 0 && len > prec)
4114 len = prec;
4115 break;
4116 case 'i':
4117 case 'd':
4118 case 'u':
4119 case 'o':
4120 case 'x':
4121 case 'X':
4122 if (c == 'i')
4123 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004124 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004125 temp = _PyString_FormatLong(v, flags,
4126 prec, c, &pbuf, &len);
4127 if (!temp)
4128 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004129 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004130 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004131 else {
4132 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004133 len = formatint(pbuf,
4134 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004135 flags, prec, c, v);
4136 if (len < 0)
4137 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004138 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004139 }
4140 if (flags & F_ZERO)
4141 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004142 break;
4143 case 'e':
4144 case 'E':
4145 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004146 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004147 case 'g':
4148 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004149 if (c == 'F')
4150 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004151 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004152 len = formatfloat(pbuf, sizeof(formatbuf),
4153 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004154 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004155 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004156 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004157 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004158 fill = '0';
4159 break;
4160 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004161#ifdef Py_USING_UNICODE
4162 if (PyUnicode_Check(v)) {
4163 fmt = fmt_start;
4164 argidx = argidx_start;
4165 goto unicode;
4166 }
4167#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004168 pbuf = formatbuf;
4169 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004170 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004171 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004172 break;
4173 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004174 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004175 "unsupported format character '%c' (0x%x) "
4176 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004177 c, c,
4178 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004179 goto error;
4180 }
4181 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004182 if (*pbuf == '-' || *pbuf == '+') {
4183 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004184 len--;
4185 }
4186 else if (flags & F_SIGN)
4187 sign = '+';
4188 else if (flags & F_BLANK)
4189 sign = ' ';
4190 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004191 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004192 }
4193 if (width < len)
4194 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004195 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004196 reslen -= rescnt;
4197 rescnt = width + fmtcnt + 100;
4198 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004199 if (reslen < 0) {
4200 Py_DECREF(result);
4201 return PyErr_NoMemory();
4202 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004203 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004204 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004205 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004206 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004207 }
4208 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004209 if (fill != ' ')
4210 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004211 rescnt--;
4212 if (width > len)
4213 width--;
4214 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004215 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4216 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004217 assert(pbuf[1] == c);
4218 if (fill != ' ') {
4219 *res++ = *pbuf++;
4220 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004221 }
Tim Petersfff53252001-04-12 18:38:48 +00004222 rescnt -= 2;
4223 width -= 2;
4224 if (width < 0)
4225 width = 0;
4226 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004227 }
4228 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004229 do {
4230 --rescnt;
4231 *res++ = fill;
4232 } while (--width > len);
4233 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004234 if (fill == ' ') {
4235 if (sign)
4236 *res++ = sign;
4237 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004238 (c == 'x' || c == 'X')) {
4239 assert(pbuf[0] == '0');
4240 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004241 *res++ = *pbuf++;
4242 *res++ = *pbuf++;
4243 }
4244 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004245 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004246 res += len;
4247 rescnt -= len;
4248 while (--width >= len) {
4249 --rescnt;
4250 *res++ = ' ';
4251 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004252 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004253 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004254 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004255 goto error;
4256 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004257 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004258 } /* '%' */
4259 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004260 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004261 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004262 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004263 goto error;
4264 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004265 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004266 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004267 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004268 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004269 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004270
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004271#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004272 unicode:
4273 if (args_owned) {
4274 Py_DECREF(args);
4275 args_owned = 0;
4276 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004277 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004278 if (PyTuple_Check(orig_args) && argidx > 0) {
4279 PyObject *v;
4280 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4281 v = PyTuple_New(n);
4282 if (v == NULL)
4283 goto error;
4284 while (--n >= 0) {
4285 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4286 Py_INCREF(w);
4287 PyTuple_SET_ITEM(v, n, w);
4288 }
4289 args = v;
4290 } else {
4291 Py_INCREF(orig_args);
4292 args = orig_args;
4293 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004294 args_owned = 1;
4295 /* Take what we have of the result and let the Unicode formatting
4296 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004297 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004298 if (_PyString_Resize(&result, rescnt))
4299 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004300 fmtcnt = PyString_GET_SIZE(format) - \
4301 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004302 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4303 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004304 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004305 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004306 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004307 if (v == NULL)
4308 goto error;
4309 /* Paste what we have (result) to what the Unicode formatting
4310 function returned (v) and return the result (or error) */
4311 w = PyUnicode_Concat(result, v);
4312 Py_DECREF(result);
4313 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004314 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004315 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004316#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004317
Guido van Rossume5372401993-03-16 12:15:04 +00004318 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004319 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004320 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004321 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004322 }
Guido van Rossume5372401993-03-16 12:15:04 +00004323 return NULL;
4324}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004325
Guido van Rossum2a61e741997-01-18 07:55:05 +00004326void
Fred Drakeba096332000-07-09 07:04:36 +00004327PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004328{
4329 register PyStringObject *s = (PyStringObject *)(*p);
4330 PyObject *t;
4331 if (s == NULL || !PyString_Check(s))
4332 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004333 /* If it's a string subclass, we don't really know what putting
4334 it in the interned dict might do. */
4335 if (!PyString_CheckExact(s))
4336 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004337 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004338 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004339 if (interned == NULL) {
4340 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004341 if (interned == NULL) {
4342 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004343 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004344 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004345 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004346 t = PyDict_GetItem(interned, (PyObject *)s);
4347 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004348 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004349 Py_DECREF(*p);
4350 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004351 return;
4352 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004353
Armin Rigo79f7ad22004-08-07 19:27:39 +00004354 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004355 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004356 return;
4357 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004358 /* The two references in interned are not counted by refcnt.
4359 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004360 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004361 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004362}
4363
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004364void
4365PyString_InternImmortal(PyObject **p)
4366{
4367 PyString_InternInPlace(p);
4368 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4369 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4370 Py_INCREF(*p);
4371 }
4372}
4373
Guido van Rossum2a61e741997-01-18 07:55:05 +00004374
4375PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004376PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004377{
4378 PyObject *s = PyString_FromString(cp);
4379 if (s == NULL)
4380 return NULL;
4381 PyString_InternInPlace(&s);
4382 return s;
4383}
4384
Guido van Rossum8cf04761997-08-02 02:57:45 +00004385void
Fred Drakeba096332000-07-09 07:04:36 +00004386PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004387{
4388 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004389 for (i = 0; i < UCHAR_MAX + 1; i++) {
4390 Py_XDECREF(characters[i]);
4391 characters[i] = NULL;
4392 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004393 Py_XDECREF(nullstring);
4394 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004395}
Barry Warsawa903ad982001-02-23 16:40:48 +00004396
Barry Warsawa903ad982001-02-23 16:40:48 +00004397void _Py_ReleaseInternedStrings(void)
4398{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004399 PyObject *keys;
4400 PyStringObject *s;
4401 int i, n;
4402
4403 if (interned == NULL || !PyDict_Check(interned))
4404 return;
4405 keys = PyDict_Keys(interned);
4406 if (keys == NULL || !PyList_Check(keys)) {
4407 PyErr_Clear();
4408 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004409 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004410
4411 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4412 detector, interned strings are not forcibly deallocated; rather, we
4413 give them their stolen references back, and then clear and DECREF
4414 the interned dict. */
4415
4416 fprintf(stderr, "releasing interned strings\n");
4417 n = PyList_GET_SIZE(keys);
4418 for (i = 0; i < n; i++) {
4419 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4420 switch (s->ob_sstate) {
4421 case SSTATE_NOT_INTERNED:
4422 /* XXX Shouldn't happen */
4423 break;
4424 case SSTATE_INTERNED_IMMORTAL:
4425 s->ob_refcnt += 1;
4426 break;
4427 case SSTATE_INTERNED_MORTAL:
4428 s->ob_refcnt += 2;
4429 break;
4430 default:
4431 Py_FatalError("Inconsistent interned string state.");
4432 }
4433 s->ob_sstate = SSTATE_NOT_INTERNED;
4434 }
4435 Py_DECREF(keys);
4436 PyDict_Clear(interned);
4437 Py_DECREF(interned);
4438 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004439}