blob: 7c3ab094da4907584e8cb9abd49efb15f6a1abd3 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000157 int n = 0;
158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000184
Barry Warsawdadace02001-08-24 18:32:06 +0000185 switch (*f) {
186 case 'c':
187 (void)va_arg(count, int);
188 /* fall through... */
189 case '%':
190 n++;
191 break;
192 case 'd': case 'i': case 'x':
193 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000194 /* 20 bytes is enough to hold a 64-bit
195 integer. Decimal takes the most space.
196 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000197 n += 20;
198 break;
199 case 's':
200 s = va_arg(count, char*);
201 n += strlen(s);
202 break;
203 case 'p':
204 (void) va_arg(count, int);
205 /* maximum 64-bit pointer representation:
206 * 0xffffffffffffffff
207 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000208 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000209 */
210 n += 19;
211 break;
212 default:
213 /* if we stumble upon an unknown
214 formatting code, copy the rest of
215 the format string to the output
216 string. (we cannot just skip the
217 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000218 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 n += strlen(p);
220 goto expand;
221 }
222 } else
223 n++;
224 }
225 expand:
226 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000227 /* Since we've analyzed how much space we need for the worst case,
228 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000229 string = PyString_FromStringAndSize(NULL, n);
230 if (!string)
231 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000232
Barry Warsawdadace02001-08-24 18:32:06 +0000233 s = PyString_AsString(string);
234
235 for (f = format; *f; f++) {
236 if (*f == '%') {
237 const char* p = f++;
238 int i, longflag = 0;
239 /* parse the width.precision part (we're only
240 interested in the precision value, if any) */
241 n = 0;
242 while (isdigit(Py_CHARMASK(*f)))
243 n = (n*10) + *f++ - '0';
244 if (*f == '.') {
245 f++;
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 }
250 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
251 f++;
252 /* handle the long flag, but only for %ld. others
253 can be added when necessary. */
254 if (*f == 'l' && *(f+1) == 'd') {
255 longflag = 1;
256 ++f;
257 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000258
Barry Warsawdadace02001-08-24 18:32:06 +0000259 switch (*f) {
260 case 'c':
261 *s++ = va_arg(vargs, int);
262 break;
263 case 'd':
264 if (longflag)
265 sprintf(s, "%ld", va_arg(vargs, long));
266 else
267 sprintf(s, "%d", va_arg(vargs, int));
268 s += strlen(s);
269 break;
270 case 'i':
271 sprintf(s, "%i", va_arg(vargs, int));
272 s += strlen(s);
273 break;
274 case 'x':
275 sprintf(s, "%x", va_arg(vargs, int));
276 s += strlen(s);
277 break;
278 case 's':
279 p = va_arg(vargs, char*);
280 i = strlen(p);
281 if (n > 0 && i > n)
282 i = n;
283 memcpy(s, p, i);
284 s += i;
285 break;
286 case 'p':
287 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000288 /* %p is ill-defined: ensure leading 0x. */
289 if (s[1] == 'X')
290 s[1] = 'x';
291 else if (s[1] != 'x') {
292 memmove(s+2, s, strlen(s)+1);
293 s[0] = '0';
294 s[1] = 'x';
295 }
Barry Warsawdadace02001-08-24 18:32:06 +0000296 s += strlen(s);
297 break;
298 case '%':
299 *s++ = '%';
300 break;
301 default:
302 strcpy(s, p);
303 s += strlen(s);
304 goto end;
305 }
306 } else
307 *s++ = *f;
308 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000309
Barry Warsawdadace02001-08-24 18:32:06 +0000310 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000311 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000312 return string;
313}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000316PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000317{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000318 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319 va_list vargs;
320
321#ifdef HAVE_STDARG_PROTOTYPES
322 va_start(vargs, format);
323#else
324 va_start(vargs);
325#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000326 ret = PyString_FromFormatV(format, vargs);
327 va_end(vargs);
328 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000329}
330
331
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000332PyObject *PyString_Decode(const char *s,
333 int size,
334 const char *encoding,
335 const char *errors)
336{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000337 PyObject *v, *str;
338
339 str = PyString_FromStringAndSize(s, size);
340 if (str == NULL)
341 return NULL;
342 v = PyString_AsDecodedString(str, encoding, errors);
343 Py_DECREF(str);
344 return v;
345}
346
347PyObject *PyString_AsDecodedObject(PyObject *str,
348 const char *encoding,
349 const char *errors)
350{
351 PyObject *v;
352
353 if (!PyString_Check(str)) {
354 PyErr_BadArgument();
355 goto onError;
356 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000357
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000358 if (encoding == NULL) {
359#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000361#else
362 PyErr_SetString(PyExc_ValueError, "no encoding specified");
363 goto onError;
364#endif
365 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000366
367 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000368 v = PyCodec_Decode(str, encoding, errors);
369 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000370 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000371
372 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000373
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000374 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000375 return NULL;
376}
377
378PyObject *PyString_AsDecodedString(PyObject *str,
379 const char *encoding,
380 const char *errors)
381{
382 PyObject *v;
383
384 v = PyString_AsDecodedObject(str, encoding, errors);
385 if (v == NULL)
386 goto onError;
387
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000388#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000389 /* Convert Unicode to a string using the default encoding */
390 if (PyUnicode_Check(v)) {
391 PyObject *temp = v;
392 v = PyUnicode_AsEncodedString(v, NULL, NULL);
393 Py_DECREF(temp);
394 if (v == NULL)
395 goto onError;
396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000397#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 if (!PyString_Check(v)) {
399 PyErr_Format(PyExc_TypeError,
400 "decoder did not return a string object (type=%.400s)",
401 v->ob_type->tp_name);
402 Py_DECREF(v);
403 goto onError;
404 }
405
406 return v;
407
408 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 return NULL;
410}
411
412PyObject *PyString_Encode(const char *s,
413 int size,
414 const char *encoding,
415 const char *errors)
416{
417 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000418
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000419 str = PyString_FromStringAndSize(s, size);
420 if (str == NULL)
421 return NULL;
422 v = PyString_AsEncodedString(str, encoding, errors);
423 Py_DECREF(str);
424 return v;
425}
426
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000427PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000428 const char *encoding,
429 const char *errors)
430{
431 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000432
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 if (!PyString_Check(str)) {
434 PyErr_BadArgument();
435 goto onError;
436 }
437
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000438 if (encoding == NULL) {
439#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000440 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000441#else
442 PyErr_SetString(PyExc_ValueError, "no encoding specified");
443 goto onError;
444#endif
445 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446
447 /* Encode via the codec registry */
448 v = PyCodec_Encode(str, encoding, errors);
449 if (v == NULL)
450 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000451
452 return v;
453
454 onError:
455 return NULL;
456}
457
458PyObject *PyString_AsEncodedString(PyObject *str,
459 const char *encoding,
460 const char *errors)
461{
462 PyObject *v;
463
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000464 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000465 if (v == NULL)
466 goto onError;
467
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000468#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469 /* Convert Unicode to a string using the default encoding */
470 if (PyUnicode_Check(v)) {
471 PyObject *temp = v;
472 v = PyUnicode_AsEncodedString(v, NULL, NULL);
473 Py_DECREF(temp);
474 if (v == NULL)
475 goto onError;
476 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000477#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 if (!PyString_Check(v)) {
479 PyErr_Format(PyExc_TypeError,
480 "encoder did not return a string object (type=%.400s)",
481 v->ob_type->tp_name);
482 Py_DECREF(v);
483 goto onError;
484 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000485
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000486 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000487
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000488 onError:
489 return NULL;
490}
491
Guido van Rossum234f9421993-06-17 12:35:49 +0000492static void
Fred Drakeba096332000-07-09 07:04:36 +0000493string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000494{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000495 switch (PyString_CHECK_INTERNED(op)) {
496 case SSTATE_NOT_INTERNED:
497 break;
498
499 case SSTATE_INTERNED_MORTAL:
500 /* revive dead object temporarily for DelItem */
501 op->ob_refcnt = 3;
502 if (PyDict_DelItem(interned, op) != 0)
503 Py_FatalError(
504 "deletion of interned string failed");
505 break;
506
507 case SSTATE_INTERNED_IMMORTAL:
508 Py_FatalError("Immortal interned string died.");
509
510 default:
511 Py_FatalError("Inconsistent interned string state.");
512 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000513 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000514}
515
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000516/* Unescape a backslash-escaped string. If unicode is non-zero,
517 the string is a u-literal. If recode_encoding is non-zero,
518 the string is UTF-8 encoded and should be re-encoded in the
519 specified encoding. */
520
521PyObject *PyString_DecodeEscape(const char *s,
522 int len,
523 const char *errors,
524 int unicode,
525 const char *recode_encoding)
526{
527 int c;
528 char *p, *buf;
529 const char *end;
530 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000531 int newlen = recode_encoding ? 4*len:len;
532 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000533 if (v == NULL)
534 return NULL;
535 p = buf = PyString_AsString(v);
536 end = s + len;
537 while (s < end) {
538 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000539 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000540#ifdef Py_USING_UNICODE
541 if (recode_encoding && (*s & 0x80)) {
542 PyObject *u, *w;
543 char *r;
544 const char* t;
545 int rn;
546 t = s;
547 /* Decode non-ASCII bytes as UTF-8. */
548 while (t < end && (*t & 0x80)) t++;
549 u = PyUnicode_DecodeUTF8(s, t - s, errors);
550 if(!u) goto failed;
551
552 /* Recode them in target encoding. */
553 w = PyUnicode_AsEncodedString(
554 u, recode_encoding, errors);
555 Py_DECREF(u);
556 if (!w) goto failed;
557
558 /* Append bytes to output buffer. */
559 r = PyString_AsString(w);
560 rn = PyString_Size(w);
561 memcpy(p, r, rn);
562 p += rn;
563 Py_DECREF(w);
564 s = t;
565 } else {
566 *p++ = *s++;
567 }
568#else
569 *p++ = *s++;
570#endif
571 continue;
572 }
573 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000574 if (s==end) {
575 PyErr_SetString(PyExc_ValueError,
576 "Trailing \\ in string");
577 goto failed;
578 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000579 switch (*s++) {
580 /* XXX This assumes ASCII! */
581 case '\n': break;
582 case '\\': *p++ = '\\'; break;
583 case '\'': *p++ = '\''; break;
584 case '\"': *p++ = '\"'; break;
585 case 'b': *p++ = '\b'; break;
586 case 'f': *p++ = '\014'; break; /* FF */
587 case 't': *p++ = '\t'; break;
588 case 'n': *p++ = '\n'; break;
589 case 'r': *p++ = '\r'; break;
590 case 'v': *p++ = '\013'; break; /* VT */
591 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
592 case '0': case '1': case '2': case '3':
593 case '4': case '5': case '6': case '7':
594 c = s[-1] - '0';
595 if ('0' <= *s && *s <= '7') {
596 c = (c<<3) + *s++ - '0';
597 if ('0' <= *s && *s <= '7')
598 c = (c<<3) + *s++ - '0';
599 }
600 *p++ = c;
601 break;
602 case 'x':
603 if (isxdigit(Py_CHARMASK(s[0]))
604 && isxdigit(Py_CHARMASK(s[1]))) {
605 unsigned int x = 0;
606 c = Py_CHARMASK(*s);
607 s++;
608 if (isdigit(c))
609 x = c - '0';
610 else if (islower(c))
611 x = 10 + c - 'a';
612 else
613 x = 10 + c - 'A';
614 x = x << 4;
615 c = Py_CHARMASK(*s);
616 s++;
617 if (isdigit(c))
618 x += c - '0';
619 else if (islower(c))
620 x += 10 + c - 'a';
621 else
622 x += 10 + c - 'A';
623 *p++ = x;
624 break;
625 }
626 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000627 PyErr_SetString(PyExc_ValueError,
628 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000629 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000630 }
631 if (strcmp(errors, "replace") == 0) {
632 *p++ = '?';
633 } else if (strcmp(errors, "ignore") == 0)
634 /* do nothing */;
635 else {
636 PyErr_Format(PyExc_ValueError,
637 "decoding error; "
638 "unknown error handling code: %.400s",
639 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000640 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000641 }
642#ifndef Py_USING_UNICODE
643 case 'u':
644 case 'U':
645 case 'N':
646 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000647 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000648 "Unicode escapes not legal "
649 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000650 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000651 }
652#endif
653 default:
654 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000655 s--;
656 goto non_esc; /* an arbitry number of unescaped
657 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000658 }
659 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000660 if (p-buf < newlen)
661 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000662 return v;
663 failed:
664 Py_DECREF(v);
665 return NULL;
666}
667
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000668static int
669string_getsize(register PyObject *op)
670{
671 char *s;
672 int len;
673 if (PyString_AsStringAndSize(op, &s, &len))
674 return -1;
675 return len;
676}
677
678static /*const*/ char *
679string_getbuffer(register PyObject *op)
680{
681 char *s;
682 int len;
683 if (PyString_AsStringAndSize(op, &s, &len))
684 return NULL;
685 return s;
686}
687
Guido van Rossumd7047b31995-01-02 19:07:15 +0000688int
Fred Drakeba096332000-07-09 07:04:36 +0000689PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000690{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000691 if (!PyString_Check(op))
692 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000694}
695
696/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000697PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000698{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000699 if (!PyString_Check(op))
700 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000702}
703
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704int
705PyString_AsStringAndSize(register PyObject *obj,
706 register char **s,
707 register int *len)
708{
709 if (s == NULL) {
710 PyErr_BadInternalCall();
711 return -1;
712 }
713
714 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000715#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000716 if (PyUnicode_Check(obj)) {
717 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
718 if (obj == NULL)
719 return -1;
720 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000721 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000722#endif
723 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000724 PyErr_Format(PyExc_TypeError,
725 "expected string or Unicode object, "
726 "%.200s found", obj->ob_type->tp_name);
727 return -1;
728 }
729 }
730
731 *s = PyString_AS_STRING(obj);
732 if (len != NULL)
733 *len = PyString_GET_SIZE(obj);
734 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
735 PyErr_SetString(PyExc_TypeError,
736 "expected string without null bytes");
737 return -1;
738 }
739 return 0;
740}
741
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742/* Methods */
743
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000744static int
Fred Drakeba096332000-07-09 07:04:36 +0000745string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000746{
747 int i;
748 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000749 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000750
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000751 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000752 if (! PyString_CheckExact(op)) {
753 int ret;
754 /* A str subclass may have its own __str__ method. */
755 op = (PyStringObject *) PyObject_Str((PyObject *)op);
756 if (op == NULL)
757 return -1;
758 ret = string_print(op, fp, flags);
759 Py_DECREF(op);
760 return ret;
761 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000762 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000763#ifdef __VMS
764 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
765#else
766 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
767#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000768 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000770
Thomas Wouters7e474022000-07-16 12:04:32 +0000771 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000772 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000773 if (memchr(op->ob_sval, '\'', op->ob_size) &&
774 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000775 quote = '"';
776
777 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000778 for (i = 0; i < op->ob_size; i++) {
779 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000780 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000781 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000782 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000783 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000784 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000785 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000786 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000787 fprintf(fp, "\\r");
788 else if (c < ' ' || c >= 0x7f)
789 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000790 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000791 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000794 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000795}
796
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000797PyObject *
798PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000800 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000801 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000802 PyObject *v;
803 if (newsize > INT_MAX) {
804 PyErr_SetString(PyExc_OverflowError,
805 "string is too large to make repr");
806 }
807 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000809 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000810 }
811 else {
812 register int i;
813 register char c;
814 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815 int quote;
816
Thomas Wouters7e474022000-07-16 12:04:32 +0000817 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000818 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000819 if (smartquotes &&
820 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000821 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000822 quote = '"';
823
Tim Peters9161c8b2001-12-03 01:55:38 +0000824 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000825 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000826 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000827 /* There's at least enough room for a hex escape
828 and a closing quote. */
829 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000833 else if (c == '\t')
834 *p++ = '\\', *p++ = 't';
835 else if (c == '\n')
836 *p++ = '\\', *p++ = 'n';
837 else if (c == '\r')
838 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 else if (c < ' ' || c >= 0x7f) {
840 /* For performance, we don't want to call
841 PyOS_snprintf here (extra layers of
842 function call). */
843 sprintf(p, "\\x%02x", c & 0xff);
844 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000845 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 else
847 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000849 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000850 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000852 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000853 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000854 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856}
857
Guido van Rossum189f1df2001-05-01 16:51:53 +0000858static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000859string_repr(PyObject *op)
860{
861 return PyString_Repr(op, 1);
862}
863
864static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000865string_str(PyObject *s)
866{
Tim Petersc9933152001-10-16 20:18:24 +0000867 assert(PyString_Check(s));
868 if (PyString_CheckExact(s)) {
869 Py_INCREF(s);
870 return s;
871 }
872 else {
873 /* Subtype -- return genuine string with the same value. */
874 PyStringObject *t = (PyStringObject *) s;
875 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
876 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000877}
878
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879static int
Fred Drakeba096332000-07-09 07:04:36 +0000880string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000881{
882 return a->ob_size;
883}
884
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000885static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000886string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000887{
888 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000889 register PyStringObject *op;
890 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000891#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000892 if (PyUnicode_Check(bb))
893 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000894#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000895 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000896 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000897 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000898 return NULL;
899 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000900#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000902 if ((a->ob_size == 0 || b->ob_size == 0) &&
903 PyString_CheckExact(a) && PyString_CheckExact(b)) {
904 if (a->ob_size == 0) {
905 Py_INCREF(bb);
906 return bb;
907 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 Py_INCREF(a);
909 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000910 }
911 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000912 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000913 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000914 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000915 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000916 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000917 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000918 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000919 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
920 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
921 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000922 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923#undef b
924}
925
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000926static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000927string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000928{
929 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000930 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000931 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000933 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934 if (n < 0)
935 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000936 /* watch out for overflows: the size can overflow int,
937 * and the # of bytes needed can overflow size_t
938 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000939 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000940 if (n && size / n != a->ob_size) {
941 PyErr_SetString(PyExc_OverflowError,
942 "repeated string is too long");
943 return NULL;
944 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000945 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 Py_INCREF(a);
947 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 }
Tim Peterse7c05322004-06-27 17:24:49 +0000949 nbytes = (size_t)size;
950 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000951 PyErr_SetString(PyExc_OverflowError,
952 "repeated string is too long");
953 return NULL;
954 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000956 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000957 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000959 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000960 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000961 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000962 op->ob_sval[size] = '\0';
963 if (a->ob_size == 1 && n > 0) {
964 memset(op->ob_sval, a->ob_sval[0] , n);
965 return (PyObject *) op;
966 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000967 i = 0;
968 if (i < size) {
969 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
970 i = (int) a->ob_size;
971 }
972 while (i < size) {
973 j = (i <= size-i) ? i : size-i;
974 memcpy(op->ob_sval+i, op->ob_sval, j);
975 i += j;
976 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000977 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978}
979
980/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
981
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000983string_slice(register PyStringObject *a, register int i, register int j)
984 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000985{
986 if (i < 0)
987 i = 0;
988 if (j < 0)
989 j = 0; /* Avoid signed/unsigned bug in next line */
990 if (j > a->ob_size)
991 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000992 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
993 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 Py_INCREF(a);
995 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 }
997 if (j < i)
998 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001000}
1001
Guido van Rossum9284a572000-03-07 15:53:43 +00001002static int
Fred Drakeba096332000-07-09 07:04:36 +00001003string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001004{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001005 char *s = PyString_AS_STRING(a);
1006 const char *sub = PyString_AS_STRING(el);
1007 char *last;
1008 int len_sub = PyString_GET_SIZE(el);
1009 int shortsub;
1010 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001011
1012 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001013#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014 if (PyUnicode_Check(el))
1015 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001016#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001017 if (!PyString_Check(el)) {
1018 PyErr_SetString(PyExc_TypeError,
1019 "'in <string>' requires string as left operand");
1020 return -1;
1021 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001022 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001023
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001024 if (len_sub == 0)
1025 return 1;
1026 /* last points to one char beyond the start of the rightmost
1027 substring. When s<last, there is still room for a possible match
1028 and s[0] through s[len_sub-1] will be in bounds.
1029 shortsub is len_sub minus the last character which is checked
1030 separately just before the memcmp(). That check helps prevent
1031 false starts and saves the setup time for memcmp().
1032 */
1033 firstchar = sub[0];
1034 shortsub = len_sub - 1;
1035 lastchar = sub[shortsub];
1036 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1037 while (s < last) {
1038 s = memchr(s, firstchar, last-s);
1039 if (s == NULL)
1040 return 0;
1041 assert(s < last);
1042 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001043 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001044 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001045 }
1046 return 0;
1047}
1048
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001049static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001050string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001052 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001053 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001054 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001056 return NULL;
1057 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001058 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001059 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001060 if (v == NULL)
1061 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001062 else {
1063#ifdef COUNT_ALLOCS
1064 one_strings++;
1065#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001066 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001067 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001068 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001069}
1070
Martin v. Löwiscd353062001-05-24 16:56:35 +00001071static PyObject*
1072string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001074 int c;
1075 int len_a, len_b;
1076 int min_len;
1077 PyObject *result;
1078
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001079 /* Make sure both arguments are strings. */
1080 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001081 result = Py_NotImplemented;
1082 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001083 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001084 if (a == b) {
1085 switch (op) {
1086 case Py_EQ:case Py_LE:case Py_GE:
1087 result = Py_True;
1088 goto out;
1089 case Py_NE:case Py_LT:case Py_GT:
1090 result = Py_False;
1091 goto out;
1092 }
1093 }
1094 if (op == Py_EQ) {
1095 /* Supporting Py_NE here as well does not save
1096 much time, since Py_NE is rarely used. */
1097 if (a->ob_size == b->ob_size
1098 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001099 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001100 a->ob_size) == 0)) {
1101 result = Py_True;
1102 } else {
1103 result = Py_False;
1104 }
1105 goto out;
1106 }
1107 len_a = a->ob_size; len_b = b->ob_size;
1108 min_len = (len_a < len_b) ? len_a : len_b;
1109 if (min_len > 0) {
1110 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1111 if (c==0)
1112 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1113 }else
1114 c = 0;
1115 if (c == 0)
1116 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1117 switch (op) {
1118 case Py_LT: c = c < 0; break;
1119 case Py_LE: c = c <= 0; break;
1120 case Py_EQ: assert(0); break; /* unreachable */
1121 case Py_NE: c = c != 0; break;
1122 case Py_GT: c = c > 0; break;
1123 case Py_GE: c = c >= 0; break;
1124 default:
1125 result = Py_NotImplemented;
1126 goto out;
1127 }
1128 result = c ? Py_True : Py_False;
1129 out:
1130 Py_INCREF(result);
1131 return result;
1132}
1133
1134int
1135_PyString_Eq(PyObject *o1, PyObject *o2)
1136{
1137 PyStringObject *a, *b;
1138 a = (PyStringObject*)o1;
1139 b = (PyStringObject*)o2;
1140 return a->ob_size == b->ob_size
1141 && *a->ob_sval == *b->ob_sval
1142 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001143}
1144
Guido van Rossum9bfef441993-03-29 10:43:31 +00001145static long
Fred Drakeba096332000-07-09 07:04:36 +00001146string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001147{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001148 register int len;
1149 register unsigned char *p;
1150 register long x;
1151
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001152 if (a->ob_shash != -1)
1153 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001154 len = a->ob_size;
1155 p = (unsigned char *) a->ob_sval;
1156 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001157 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001158 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001159 x ^= a->ob_size;
1160 if (x == -1)
1161 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001162 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001163 return x;
1164}
1165
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001166static PyObject*
1167string_subscript(PyStringObject* self, PyObject* item)
1168{
1169 if (PyInt_Check(item)) {
1170 long i = PyInt_AS_LONG(item);
1171 if (i < 0)
1172 i += PyString_GET_SIZE(self);
1173 return string_item(self,i);
1174 }
1175 else if (PyLong_Check(item)) {
1176 long i = PyLong_AsLong(item);
1177 if (i == -1 && PyErr_Occurred())
1178 return NULL;
1179 if (i < 0)
1180 i += PyString_GET_SIZE(self);
1181 return string_item(self,i);
1182 }
1183 else if (PySlice_Check(item)) {
1184 int start, stop, step, slicelength, cur, i;
1185 char* source_buf;
1186 char* result_buf;
1187 PyObject* result;
1188
1189 if (PySlice_GetIndicesEx((PySliceObject*)item,
1190 PyString_GET_SIZE(self),
1191 &start, &stop, &step, &slicelength) < 0) {
1192 return NULL;
1193 }
1194
1195 if (slicelength <= 0) {
1196 return PyString_FromStringAndSize("", 0);
1197 }
1198 else {
1199 source_buf = PyString_AsString((PyObject*)self);
1200 result_buf = PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001201 if (result_buf == NULL)
1202 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001203
1204 for (cur = start, i = 0; i < slicelength;
1205 cur += step, i++) {
1206 result_buf[i] = source_buf[cur];
1207 }
1208
1209 result = PyString_FromStringAndSize(result_buf,
1210 slicelength);
1211 PyMem_Free(result_buf);
1212 return result;
1213 }
1214 }
1215 else {
1216 PyErr_SetString(PyExc_TypeError,
1217 "string indices must be integers");
1218 return NULL;
1219 }
1220}
1221
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001222static int
Fred Drakeba096332000-07-09 07:04:36 +00001223string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001224{
1225 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001226 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001227 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001228 return -1;
1229 }
1230 *ptr = (void *)self->ob_sval;
1231 return self->ob_size;
1232}
1233
1234static int
Fred Drakeba096332000-07-09 07:04:36 +00001235string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001236{
Guido van Rossum045e6881997-09-08 18:30:11 +00001237 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001238 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001239 return -1;
1240}
1241
1242static int
Fred Drakeba096332000-07-09 07:04:36 +00001243string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001244{
1245 if ( lenp )
1246 *lenp = self->ob_size;
1247 return 1;
1248}
1249
Guido van Rossum1db70701998-10-08 02:18:52 +00001250static int
Fred Drakeba096332000-07-09 07:04:36 +00001251string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001252{
1253 if ( index != 0 ) {
1254 PyErr_SetString(PyExc_SystemError,
1255 "accessing non-existent string segment");
1256 return -1;
1257 }
1258 *ptr = self->ob_sval;
1259 return self->ob_size;
1260}
1261
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001262static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001263 (inquiry)string_length, /*sq_length*/
1264 (binaryfunc)string_concat, /*sq_concat*/
1265 (intargfunc)string_repeat, /*sq_repeat*/
1266 (intargfunc)string_item, /*sq_item*/
1267 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001268 0, /*sq_ass_item*/
1269 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001270 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001271};
1272
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001273static PyMappingMethods string_as_mapping = {
1274 (inquiry)string_length,
1275 (binaryfunc)string_subscript,
1276 0,
1277};
1278
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001279static PyBufferProcs string_as_buffer = {
1280 (getreadbufferproc)string_buffer_getreadbuf,
1281 (getwritebufferproc)string_buffer_getwritebuf,
1282 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001283 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001284};
1285
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286
1287
1288#define LEFTSTRIP 0
1289#define RIGHTSTRIP 1
1290#define BOTHSTRIP 2
1291
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001292/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001293static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1294
1295#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001296
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001297#define SPLIT_APPEND(data, left, right) \
1298 str = PyString_FromStringAndSize((data) + (left), \
1299 (right) - (left)); \
1300 if (str == NULL) \
1301 goto onError; \
1302 if (PyList_Append(list, str)) { \
1303 Py_DECREF(str); \
1304 goto onError; \
1305 } \
1306 else \
1307 Py_DECREF(str);
1308
1309#define SPLIT_INSERT(data, left, right) \
1310 str = PyString_FromStringAndSize((data) + (left), \
1311 (right) - (left)); \
1312 if (str == NULL) \
1313 goto onError; \
1314 if (PyList_Insert(list, 0, str)) { \
1315 Py_DECREF(str); \
1316 goto onError; \
1317 } \
1318 else \
1319 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320
1321static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001322split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001324 int i, j;
1325 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001326 PyObject *list = PyList_New(0);
1327
1328 if (list == NULL)
1329 return NULL;
1330
Guido van Rossum4c08d552000-03-10 22:55:18 +00001331 for (i = j = 0; i < len; ) {
1332 while (i < len && isspace(Py_CHARMASK(s[i])))
1333 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335 while (i < len && !isspace(Py_CHARMASK(s[i])))
1336 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001338 if (maxsplit-- <= 0)
1339 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001340 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001341 while (i < len && isspace(Py_CHARMASK(s[i])))
1342 i++;
1343 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344 }
1345 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001346 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001348 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001350 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001351 Py_DECREF(list);
1352 return NULL;
1353}
1354
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001355static PyObject *
1356split_char(const char *s, int len, char ch, int maxcount)
1357{
1358 register int i, j;
1359 PyObject *str;
1360 PyObject *list = PyList_New(0);
1361
1362 if (list == NULL)
1363 return NULL;
1364
1365 for (i = j = 0; i < len; ) {
1366 if (s[i] == ch) {
1367 if (maxcount-- <= 0)
1368 break;
1369 SPLIT_APPEND(s, j, i);
1370 i = j = i + 1;
1371 } else
1372 i++;
1373 }
1374 if (j <= len) {
1375 SPLIT_APPEND(s, j, len);
1376 }
1377 return list;
1378
1379 onError:
1380 Py_DECREF(list);
1381 return NULL;
1382}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001383
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001384PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385"S.split([sep [,maxsplit]]) -> list of strings\n\
1386\n\
1387Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001388delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001389splits are done. If sep is not specified or is None, any\n\
1390whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391
1392static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001393string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001394{
1395 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001396 int maxsplit = -1;
1397 const char *s = PyString_AS_STRING(self), *sub;
1398 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399
Guido van Rossum4c08d552000-03-10 22:55:18 +00001400 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001402 if (maxsplit < 0)
1403 maxsplit = INT_MAX;
1404 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001406 if (PyString_Check(subobj)) {
1407 sub = PyString_AS_STRING(subobj);
1408 n = PyString_GET_SIZE(subobj);
1409 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001410#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001411 else if (PyUnicode_Check(subobj))
1412 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001413#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001414 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1415 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001416
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417 if (n == 0) {
1418 PyErr_SetString(PyExc_ValueError, "empty separator");
1419 return NULL;
1420 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001421 else if (n == 1)
1422 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423
1424 list = PyList_New(0);
1425 if (list == NULL)
1426 return NULL;
1427
1428 i = j = 0;
1429 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001430 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001431 if (maxsplit-- <= 0)
1432 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1434 if (item == NULL)
1435 goto fail;
1436 err = PyList_Append(list, item);
1437 Py_DECREF(item);
1438 if (err < 0)
1439 goto fail;
1440 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441 }
1442 else
1443 i++;
1444 }
1445 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1446 if (item == NULL)
1447 goto fail;
1448 err = PyList_Append(list, item);
1449 Py_DECREF(item);
1450 if (err < 0)
1451 goto fail;
1452
1453 return list;
1454
1455 fail:
1456 Py_DECREF(list);
1457 return NULL;
1458}
1459
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001460static PyObject *
1461rsplit_whitespace(const char *s, int len, int maxsplit)
1462{
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001463 int i, j;
1464 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001465 PyObject *list = PyList_New(0);
1466
1467 if (list == NULL)
1468 return NULL;
1469
1470 for (i = j = len - 1; i >= 0; ) {
1471 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1472 i--;
1473 j = i;
1474 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1475 i--;
1476 if (j > i) {
1477 if (maxsplit-- <= 0)
1478 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001479 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001480 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1481 i--;
1482 j = i;
1483 }
1484 }
1485 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001486 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001487 }
1488 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001489 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001490 Py_DECREF(list);
1491 return NULL;
1492}
1493
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001494static PyObject *
1495rsplit_char(const char *s, int len, char ch, int maxcount)
1496{
1497 register int i, j;
1498 PyObject *str;
1499 PyObject *list = PyList_New(0);
1500
1501 if (list == NULL)
1502 return NULL;
1503
1504 for (i = j = len - 1; i >= 0; ) {
1505 if (s[i] == ch) {
1506 if (maxcount-- <= 0)
1507 break;
1508 SPLIT_INSERT(s, i + 1, j + 1);
1509 j = i = i - 1;
1510 } else
1511 i--;
1512 }
1513 if (j >= -1) {
1514 SPLIT_INSERT(s, 0, j + 1);
1515 }
1516 return list;
1517
1518 onError:
1519 Py_DECREF(list);
1520 return NULL;
1521}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001522
1523PyDoc_STRVAR(rsplit__doc__,
1524"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1525\n\
1526Return a list of the words in the string S, using sep as the\n\
1527delimiter string, starting at the end of the string and working\n\
1528to the front. If maxsplit is given, at most maxsplit splits are\n\
1529done. If sep is not specified or is None, any whitespace string\n\
1530is a separator.");
1531
1532static PyObject *
1533string_rsplit(PyStringObject *self, PyObject *args)
1534{
1535 int len = PyString_GET_SIZE(self), n, i, j, err;
1536 int maxsplit = -1;
1537 const char *s = PyString_AS_STRING(self), *sub;
1538 PyObject *list, *item, *subobj = Py_None;
1539
1540 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1541 return NULL;
1542 if (maxsplit < 0)
1543 maxsplit = INT_MAX;
1544 if (subobj == Py_None)
1545 return rsplit_whitespace(s, len, maxsplit);
1546 if (PyString_Check(subobj)) {
1547 sub = PyString_AS_STRING(subobj);
1548 n = PyString_GET_SIZE(subobj);
1549 }
1550#ifdef Py_USING_UNICODE
1551 else if (PyUnicode_Check(subobj))
1552 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1553#endif
1554 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1555 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001556
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001557 if (n == 0) {
1558 PyErr_SetString(PyExc_ValueError, "empty separator");
1559 return NULL;
1560 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001561 else if (n == 1)
1562 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001563
1564 list = PyList_New(0);
1565 if (list == NULL)
1566 return NULL;
1567
1568 j = len;
1569 i = j - n;
1570 while (i >= 0) {
1571 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1572 if (maxsplit-- <= 0)
1573 break;
1574 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1575 if (item == NULL)
1576 goto fail;
1577 err = PyList_Insert(list, 0, item);
1578 Py_DECREF(item);
1579 if (err < 0)
1580 goto fail;
1581 j = i;
1582 i -= n;
1583 }
1584 else
1585 i--;
1586 }
1587 item = PyString_FromStringAndSize(s, j);
1588 if (item == NULL)
1589 goto fail;
1590 err = PyList_Insert(list, 0, item);
1591 Py_DECREF(item);
1592 if (err < 0)
1593 goto fail;
1594
1595 return list;
1596
1597 fail:
1598 Py_DECREF(list);
1599 return NULL;
1600}
1601
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001602
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001603PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604"S.join(sequence) -> string\n\
1605\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001606Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001607sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608
1609static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001610string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611{
1612 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001613 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 char *p;
1616 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001617 size_t sz = 0;
1618 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001619 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620
Tim Peters19fe14e2001-01-19 03:03:47 +00001621 seq = PySequence_Fast(orig, "");
1622 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001623 return NULL;
1624 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001625
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001626 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001627 if (seqlen == 0) {
1628 Py_DECREF(seq);
1629 return PyString_FromString("");
1630 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001632 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001633 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1634 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001635 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001636 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001637 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001639
Raymond Hettinger674f2412004-08-23 23:23:54 +00001640 /* There are at least two things to join, or else we have a subclass
1641 * of the builtin types in the sequence.
1642 * Do a pre-pass to figure out the total amount of space we'll
1643 * need (sz), see whether any argument is absurd, and defer to
1644 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001645 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001646 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001647 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001648 item = PySequence_Fast_GET_ITEM(seq, i);
1649 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001650#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001651 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001652 /* Defer to Unicode join.
1653 * CAUTION: There's no gurantee that the
1654 * original sequence can be iterated over
1655 * again, so we must pass seq here.
1656 */
1657 PyObject *result;
1658 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001659 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001660 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001661 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001662#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001663 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001664 "sequence item %i: expected string,"
1665 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001666 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001667 Py_DECREF(seq);
1668 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001669 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001670 sz += PyString_GET_SIZE(item);
1671 if (i != 0)
1672 sz += seplen;
1673 if (sz < old_sz || sz > INT_MAX) {
1674 PyErr_SetString(PyExc_OverflowError,
1675 "join() is too long for a Python string");
1676 Py_DECREF(seq);
1677 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001679 }
1680
1681 /* Allocate result space. */
1682 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1683 if (res == NULL) {
1684 Py_DECREF(seq);
1685 return NULL;
1686 }
1687
1688 /* Catenate everything. */
1689 p = PyString_AS_STRING(res);
1690 for (i = 0; i < seqlen; ++i) {
1691 size_t n;
1692 item = PySequence_Fast_GET_ITEM(seq, i);
1693 n = PyString_GET_SIZE(item);
1694 memcpy(p, PyString_AS_STRING(item), n);
1695 p += n;
1696 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001697 memcpy(p, sep, seplen);
1698 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001699 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001700 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001701
Jeremy Hylton49048292000-07-11 03:28:17 +00001702 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704}
1705
Tim Peters52e155e2001-06-16 05:42:57 +00001706PyObject *
1707_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001708{
Tim Petersa7259592001-06-16 05:11:17 +00001709 assert(sep != NULL && PyString_Check(sep));
1710 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001711 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001712}
1713
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001714static void
1715string_adjust_indices(int *start, int *end, int len)
1716{
1717 if (*end > len)
1718 *end = len;
1719 else if (*end < 0)
1720 *end += len;
1721 if (*end < 0)
1722 *end = 0;
1723 if (*start < 0)
1724 *start += len;
1725 if (*start < 0)
1726 *start = 0;
1727}
1728
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729static long
Fred Drakeba096332000-07-09 07:04:36 +00001730string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001732 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733 int len = PyString_GET_SIZE(self);
1734 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001735 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001737 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001738 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001739 return -2;
1740 if (PyString_Check(subobj)) {
1741 sub = PyString_AS_STRING(subobj);
1742 n = PyString_GET_SIZE(subobj);
1743 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001744#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001746 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001747#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749 return -2;
1750
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001751 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753 if (dir > 0) {
1754 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001756 last -= n;
1757 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001758 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001759 return (long)i;
1760 }
1761 else {
1762 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001763
Guido van Rossum4c08d552000-03-10 22:55:18 +00001764 if (n == 0 && i <= last)
1765 return (long)last;
1766 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001767 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001768 return (long)j;
1769 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001770
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001771 return -1;
1772}
1773
1774
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776"S.find(sub [,start [,end]]) -> int\n\
1777\n\
1778Return the lowest index in S where substring sub is found,\n\
1779such that sub is contained within s[start,end]. Optional\n\
1780arguments start and end are interpreted as in slice notation.\n\
1781\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001782Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783
1784static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001785string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001787 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788 if (result == -2)
1789 return NULL;
1790 return PyInt_FromLong(result);
1791}
1792
1793
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001794PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795"S.index(sub [,start [,end]]) -> int\n\
1796\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001797Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798
1799static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001800string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001802 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 if (result == -2)
1804 return NULL;
1805 if (result == -1) {
1806 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001807 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808 return NULL;
1809 }
1810 return PyInt_FromLong(result);
1811}
1812
1813
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001814PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815"S.rfind(sub [,start [,end]]) -> int\n\
1816\n\
1817Return the highest index in S where substring sub is found,\n\
1818such that sub is contained within s[start,end]. Optional\n\
1819arguments start and end are interpreted as in slice notation.\n\
1820\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001821Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822
1823static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001824string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001826 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 if (result == -2)
1828 return NULL;
1829 return PyInt_FromLong(result);
1830}
1831
1832
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001833PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834"S.rindex(sub [,start [,end]]) -> int\n\
1835\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001836Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001837
1838static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001839string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001841 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842 if (result == -2)
1843 return NULL;
1844 if (result == -1) {
1845 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001846 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 return NULL;
1848 }
1849 return PyInt_FromLong(result);
1850}
1851
1852
1853static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001854do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1855{
1856 char *s = PyString_AS_STRING(self);
1857 int len = PyString_GET_SIZE(self);
1858 char *sep = PyString_AS_STRING(sepobj);
1859 int seplen = PyString_GET_SIZE(sepobj);
1860 int i, j;
1861
1862 i = 0;
1863 if (striptype != RIGHTSTRIP) {
1864 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1865 i++;
1866 }
1867 }
1868
1869 j = len;
1870 if (striptype != LEFTSTRIP) {
1871 do {
1872 j--;
1873 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1874 j++;
1875 }
1876
1877 if (i == 0 && j == len && PyString_CheckExact(self)) {
1878 Py_INCREF(self);
1879 return (PyObject*)self;
1880 }
1881 else
1882 return PyString_FromStringAndSize(s+i, j-i);
1883}
1884
1885
1886static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001887do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888{
1889 char *s = PyString_AS_STRING(self);
1890 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 i = 0;
1893 if (striptype != RIGHTSTRIP) {
1894 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1895 i++;
1896 }
1897 }
1898
1899 j = len;
1900 if (striptype != LEFTSTRIP) {
1901 do {
1902 j--;
1903 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1904 j++;
1905 }
1906
Tim Peters8fa5dd02001-09-12 02:18:30 +00001907 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 Py_INCREF(self);
1909 return (PyObject*)self;
1910 }
1911 else
1912 return PyString_FromStringAndSize(s+i, j-i);
1913}
1914
1915
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001916static PyObject *
1917do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1918{
1919 PyObject *sep = NULL;
1920
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001921 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001922 return NULL;
1923
1924 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001925 if (PyString_Check(sep))
1926 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001927#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001928 else if (PyUnicode_Check(sep)) {
1929 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1930 PyObject *res;
1931 if (uniself==NULL)
1932 return NULL;
1933 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1934 striptype, sep);
1935 Py_DECREF(uniself);
1936 return res;
1937 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001938#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001939 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001940 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001941#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001942 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001943#else
1944 "%s arg must be None or str",
1945#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001946 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001947 return NULL;
1948 }
1949 return do_xstrip(self, striptype, sep);
1950 }
1951
1952 return do_strip(self, striptype);
1953}
1954
1955
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001956PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001957"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958\n\
1959Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001960whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001961If chars is given and not None, remove characters in chars instead.\n\
1962If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963
1964static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001965string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001967 if (PyTuple_GET_SIZE(args) == 0)
1968 return do_strip(self, BOTHSTRIP); /* Common case */
1969 else
1970 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971}
1972
1973
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001974PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001975"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001977Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001978If chars is given and not None, remove characters in chars instead.\n\
1979If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980
1981static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001982string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001984 if (PyTuple_GET_SIZE(args) == 0)
1985 return do_strip(self, LEFTSTRIP); /* Common case */
1986 else
1987 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988}
1989
1990
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001991PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001992"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001994Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001995If chars is given and not None, remove characters in chars instead.\n\
1996If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997
1998static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001999string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002001 if (PyTuple_GET_SIZE(args) == 0)
2002 return do_strip(self, RIGHTSTRIP); /* Common case */
2003 else
2004 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005}
2006
2007
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002008PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009"S.lower() -> string\n\
2010\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002011Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012
2013static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002014string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015{
2016 char *s = PyString_AS_STRING(self), *s_new;
2017 int i, n = PyString_GET_SIZE(self);
2018 PyObject *new;
2019
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020 new = PyString_FromStringAndSize(NULL, n);
2021 if (new == NULL)
2022 return NULL;
2023 s_new = PyString_AsString(new);
2024 for (i = 0; i < n; i++) {
2025 int c = Py_CHARMASK(*s++);
2026 if (isupper(c)) {
2027 *s_new = tolower(c);
2028 } else
2029 *s_new = c;
2030 s_new++;
2031 }
2032 return new;
2033}
2034
2035
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002036PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037"S.upper() -> string\n\
2038\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002039Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040
2041static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002042string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043{
2044 char *s = PyString_AS_STRING(self), *s_new;
2045 int i, n = PyString_GET_SIZE(self);
2046 PyObject *new;
2047
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048 new = PyString_FromStringAndSize(NULL, n);
2049 if (new == NULL)
2050 return NULL;
2051 s_new = PyString_AsString(new);
2052 for (i = 0; i < n; i++) {
2053 int c = Py_CHARMASK(*s++);
2054 if (islower(c)) {
2055 *s_new = toupper(c);
2056 } else
2057 *s_new = c;
2058 s_new++;
2059 }
2060 return new;
2061}
2062
2063
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002064PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065"S.title() -> string\n\
2066\n\
2067Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002068characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069
2070static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002071string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002072{
2073 char *s = PyString_AS_STRING(self), *s_new;
2074 int i, n = PyString_GET_SIZE(self);
2075 int previous_is_cased = 0;
2076 PyObject *new;
2077
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 new = PyString_FromStringAndSize(NULL, n);
2079 if (new == NULL)
2080 return NULL;
2081 s_new = PyString_AsString(new);
2082 for (i = 0; i < n; i++) {
2083 int c = Py_CHARMASK(*s++);
2084 if (islower(c)) {
2085 if (!previous_is_cased)
2086 c = toupper(c);
2087 previous_is_cased = 1;
2088 } else if (isupper(c)) {
2089 if (previous_is_cased)
2090 c = tolower(c);
2091 previous_is_cased = 1;
2092 } else
2093 previous_is_cased = 0;
2094 *s_new++ = c;
2095 }
2096 return new;
2097}
2098
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002099PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100"S.capitalize() -> string\n\
2101\n\
2102Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002103capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104
2105static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002106string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107{
2108 char *s = PyString_AS_STRING(self), *s_new;
2109 int i, n = PyString_GET_SIZE(self);
2110 PyObject *new;
2111
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112 new = PyString_FromStringAndSize(NULL, n);
2113 if (new == NULL)
2114 return NULL;
2115 s_new = PyString_AsString(new);
2116 if (0 < n) {
2117 int c = Py_CHARMASK(*s++);
2118 if (islower(c))
2119 *s_new = toupper(c);
2120 else
2121 *s_new = c;
2122 s_new++;
2123 }
2124 for (i = 1; i < n; i++) {
2125 int c = Py_CHARMASK(*s++);
2126 if (isupper(c))
2127 *s_new = tolower(c);
2128 else
2129 *s_new = c;
2130 s_new++;
2131 }
2132 return new;
2133}
2134
2135
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002136PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137"S.count(sub[, start[, end]]) -> int\n\
2138\n\
2139Return the number of occurrences of substring sub in string\n\
2140S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002141interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142
2143static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002144string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002146 const char *s = PyString_AS_STRING(self), *sub, *t;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 int len = PyString_GET_SIZE(self), n;
2148 int i = 0, last = INT_MAX;
2149 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002150 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151
Guido van Rossumc6821402000-05-08 14:08:05 +00002152 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2153 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002155
Guido van Rossum4c08d552000-03-10 22:55:18 +00002156 if (PyString_Check(subobj)) {
2157 sub = PyString_AS_STRING(subobj);
2158 n = PyString_GET_SIZE(subobj);
2159 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002160#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002161 else if (PyUnicode_Check(subobj)) {
2162 int count;
2163 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2164 if (count == -1)
2165 return NULL;
2166 else
2167 return PyInt_FromLong((long) count);
2168 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002169#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002170 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2171 return NULL;
2172
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002173 string_adjust_indices(&i, &last, len);
2174
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175 m = last + 1 - n;
2176 if (n == 0)
2177 return PyInt_FromLong((long) (m-i));
2178
2179 r = 0;
2180 while (i < m) {
2181 if (!memcmp(s+i, sub, n)) {
2182 r++;
2183 i += n;
2184 } else {
2185 i++;
2186 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002187 if (i >= m)
2188 break;
2189 t = memchr(s+i, sub[0], m-i);
2190 if (t == NULL)
2191 break;
2192 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193 }
2194 return PyInt_FromLong((long) r);
2195}
2196
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002197PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198"S.swapcase() -> string\n\
2199\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002201converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202
2203static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002204string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205{
2206 char *s = PyString_AS_STRING(self), *s_new;
2207 int i, n = PyString_GET_SIZE(self);
2208 PyObject *new;
2209
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210 new = PyString_FromStringAndSize(NULL, n);
2211 if (new == NULL)
2212 return NULL;
2213 s_new = PyString_AsString(new);
2214 for (i = 0; i < n; i++) {
2215 int c = Py_CHARMASK(*s++);
2216 if (islower(c)) {
2217 *s_new = toupper(c);
2218 }
2219 else if (isupper(c)) {
2220 *s_new = tolower(c);
2221 }
2222 else
2223 *s_new = c;
2224 s_new++;
2225 }
2226 return new;
2227}
2228
2229
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002230PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231"S.translate(table [,deletechars]) -> string\n\
2232\n\
2233Return a copy of the string S, where all characters occurring\n\
2234in the optional argument deletechars are removed, and the\n\
2235remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002236translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237
2238static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002239string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241 register char *input, *output;
2242 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243 register int i, c, changed = 0;
2244 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002245 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246 int inlen, tablen, dellen = 0;
2247 PyObject *result;
2248 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002249 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002251 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002252 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254
2255 if (PyString_Check(tableobj)) {
2256 table1 = PyString_AS_STRING(tableobj);
2257 tablen = PyString_GET_SIZE(tableobj);
2258 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002259#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002260 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002261 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 parameter; instead a mapping to None will cause characters
2263 to be deleted. */
2264 if (delobj != NULL) {
2265 PyErr_SetString(PyExc_TypeError,
2266 "deletions are implemented differently for unicode");
2267 return NULL;
2268 }
2269 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2270 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002271#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002273 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274
Martin v. Löwis00b61272002-12-12 20:03:19 +00002275 if (tablen != 256) {
2276 PyErr_SetString(PyExc_ValueError,
2277 "translation table must be 256 characters long");
2278 return NULL;
2279 }
2280
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 if (delobj != NULL) {
2282 if (PyString_Check(delobj)) {
2283 del_table = PyString_AS_STRING(delobj);
2284 dellen = PyString_GET_SIZE(delobj);
2285 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002286#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287 else if (PyUnicode_Check(delobj)) {
2288 PyErr_SetString(PyExc_TypeError,
2289 "deletions are implemented differently for unicode");
2290 return NULL;
2291 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002292#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2294 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295 }
2296 else {
2297 del_table = NULL;
2298 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299 }
2300
2301 table = table1;
2302 inlen = PyString_Size(input_obj);
2303 result = PyString_FromStringAndSize((char *)NULL, inlen);
2304 if (result == NULL)
2305 return NULL;
2306 output_start = output = PyString_AsString(result);
2307 input = PyString_AsString(input_obj);
2308
2309 if (dellen == 0) {
2310 /* If no deletions are required, use faster code */
2311 for (i = inlen; --i >= 0; ) {
2312 c = Py_CHARMASK(*input++);
2313 if (Py_CHARMASK((*output++ = table[c])) != c)
2314 changed = 1;
2315 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002316 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317 return result;
2318 Py_DECREF(result);
2319 Py_INCREF(input_obj);
2320 return input_obj;
2321 }
2322
2323 for (i = 0; i < 256; i++)
2324 trans_table[i] = Py_CHARMASK(table[i]);
2325
2326 for (i = 0; i < dellen; i++)
2327 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2328
2329 for (i = inlen; --i >= 0; ) {
2330 c = Py_CHARMASK(*input++);
2331 if (trans_table[c] != -1)
2332 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2333 continue;
2334 changed = 1;
2335 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002336 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 Py_DECREF(result);
2338 Py_INCREF(input_obj);
2339 return input_obj;
2340 }
2341 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002342 if (inlen > 0)
2343 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344 return result;
2345}
2346
2347
2348/* What follows is used for implementing replace(). Perry Stoll. */
2349
2350/*
2351 mymemfind
2352
2353 strstr replacement for arbitrary blocks of memory.
2354
Barry Warsaw51ac5802000-03-20 16:36:48 +00002355 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356 contents of memory pointed to by PAT. Returns the index into MEM if
2357 found, or -1 if not found. If len of PAT is greater than length of
2358 MEM, the function returns -1.
2359*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002360static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002361mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362{
2363 register int ii;
2364
2365 /* pattern can not occur in the last pat_len-1 chars */
2366 len -= pat_len;
2367
2368 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002369 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370 return ii;
2371 }
2372 }
2373 return -1;
2374}
2375
2376/*
2377 mymemcnt
2378
2379 Return the number of distinct times PAT is found in MEM.
2380 meaning mem=1111 and pat==11 returns 2.
2381 mem=11111 and pat==11 also return 2.
2382 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002383static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002384mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385{
2386 register int offset = 0;
2387 int nfound = 0;
2388
2389 while (len >= 0) {
2390 offset = mymemfind(mem, len, pat, pat_len);
2391 if (offset == -1)
2392 break;
2393 mem += offset + pat_len;
2394 len -= offset + pat_len;
2395 nfound++;
2396 }
2397 return nfound;
2398}
2399
2400/*
2401 mymemreplace
2402
Thomas Wouters7e474022000-07-16 12:04:32 +00002403 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404 replaced with SUB.
2405
Thomas Wouters7e474022000-07-16 12:04:32 +00002406 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407 of PAT in STR, then the original string is returned. Otherwise, a new
2408 string is allocated here and returned.
2409
2410 on return, out_len is:
2411 the length of output string, or
2412 -1 if the input string is returned, or
2413 unchanged if an error occurs (no memory).
2414
2415 return value is:
2416 the new string allocated locally, or
2417 NULL if an error occurred.
2418*/
2419static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002420mymemreplace(const char *str, int len, /* input string */
2421 const char *pat, int pat_len, /* pattern string to find */
2422 const char *sub, int sub_len, /* substitution string */
2423 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002424 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425{
2426 char *out_s;
2427 char *new_s;
2428 int nfound, offset, new_len;
2429
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002430 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431 goto return_same;
2432
2433 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002434 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002435 if (count < 0)
2436 count = INT_MAX;
2437 else if (nfound > count)
2438 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439 if (nfound == 0)
2440 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002441
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002442 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002443 if (new_len == 0) {
2444 /* Have to allocate something for the caller to free(). */
2445 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002446 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002447 return NULL;
2448 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002450 else {
2451 assert(new_len > 0);
2452 new_s = (char *)PyMem_MALLOC(new_len);
2453 if (new_s == NULL)
2454 return NULL;
2455 out_s = new_s;
2456
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002457 if (pat_len > 0) {
2458 for (; nfound > 0; --nfound) {
2459 /* find index of next instance of pattern */
2460 offset = mymemfind(str, len, pat, pat_len);
2461 if (offset == -1)
2462 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002463
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002464 /* copy non matching part of input string */
2465 memcpy(new_s, str, offset);
2466 str += offset + pat_len;
2467 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002468
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002469 /* copy substitute into the output string */
2470 new_s += offset;
2471 memcpy(new_s, sub, sub_len);
2472 new_s += sub_len;
2473 }
2474 /* copy any remaining values into output string */
2475 if (len > 0)
2476 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002477 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002478 else {
2479 for (;;++str, --len) {
2480 memcpy(new_s, sub, sub_len);
2481 new_s += sub_len;
2482 if (--nfound <= 0) {
2483 memcpy(new_s, str, len);
2484 break;
2485 }
2486 *new_s++ = *str;
2487 }
2488 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002489 }
2490 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002491 return out_s;
2492
2493 return_same:
2494 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002495 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002496}
2497
2498
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002499PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002500"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501\n\
2502Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002503old replaced by new. If the optional argument count is\n\
2504given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505
2506static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002507string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002508{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509 const char *str = PyString_AS_STRING(self), *sub, *repl;
2510 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002511 const int len = PyString_GET_SIZE(self);
2512 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002514 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002516
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517 if (!PyArg_ParseTuple(args, "OO|i:replace",
2518 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002519 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002520
2521 if (PyString_Check(subobj)) {
2522 sub = PyString_AS_STRING(subobj);
2523 sub_len = PyString_GET_SIZE(subobj);
2524 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002525#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002527 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002529#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2531 return NULL;
2532
2533 if (PyString_Check(replobj)) {
2534 repl = PyString_AS_STRING(replobj);
2535 repl_len = PyString_GET_SIZE(replobj);
2536 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002537#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002538 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002539 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002540 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002541#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2543 return NULL;
2544
Guido van Rossum4c08d552000-03-10 22:55:18 +00002545 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002546 if (new_s == NULL) {
2547 PyErr_NoMemory();
2548 return NULL;
2549 }
2550 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002551 if (PyString_CheckExact(self)) {
2552 /* we're returning another reference to self */
2553 new = (PyObject*)self;
2554 Py_INCREF(new);
2555 }
2556 else {
2557 new = PyString_FromStringAndSize(str, len);
2558 if (new == NULL)
2559 return NULL;
2560 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002561 }
2562 else {
2563 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002564 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002565 }
2566 return new;
2567}
2568
2569
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002570PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002571"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002572\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002573Return True if S starts with the specified prefix, False otherwise.\n\
2574With optional start, test S beginning at that position.\n\
2575With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002576
2577static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002578string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002579{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002581 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002582 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002583 int plen;
2584 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002585 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002586 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002587
Guido van Rossumc6821402000-05-08 14:08:05 +00002588 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2589 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590 return NULL;
2591 if (PyString_Check(subobj)) {
2592 prefix = PyString_AS_STRING(subobj);
2593 plen = PyString_GET_SIZE(subobj);
2594 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002595#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002596 else if (PyUnicode_Check(subobj)) {
2597 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002598 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002599 subobj, start, end, -1);
2600 if (rc == -1)
2601 return NULL;
2602 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002603 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002604 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002605#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002607 return NULL;
2608
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002609 string_adjust_indices(&start, &end, len);
2610
2611 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002612 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002613
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002614 if (end-start >= plen)
2615 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2616 else
2617 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002618}
2619
2620
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002621PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002622"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002623\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002624Return True if S ends with the specified suffix, False otherwise.\n\
2625With optional start, test S beginning at that position.\n\
2626With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002627
2628static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002629string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002630{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002631 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002632 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633 const char* suffix;
2634 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002635 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002636 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002637 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002638
Guido van Rossumc6821402000-05-08 14:08:05 +00002639 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2640 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002641 return NULL;
2642 if (PyString_Check(subobj)) {
2643 suffix = PyString_AS_STRING(subobj);
2644 slen = PyString_GET_SIZE(subobj);
2645 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002646#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002647 else if (PyUnicode_Check(subobj)) {
2648 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002649 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002650 subobj, start, end, +1);
2651 if (rc == -1)
2652 return NULL;
2653 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002654 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002655 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002656#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002657 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002658 return NULL;
2659
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002660 string_adjust_indices(&start, &end, len);
2661
2662 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002663 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002664
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002665 if (end-slen > start)
2666 start = end - slen;
2667 if (end-start >= slen)
2668 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2669 else
2670 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002671}
2672
2673
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002674PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002675"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002676\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002677Encodes S using the codec registered for encoding. encoding defaults\n\
2678to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002679handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002680a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2681'xmlcharrefreplace' as well as any other name registered with\n\
2682codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002683
2684static PyObject *
2685string_encode(PyStringObject *self, PyObject *args)
2686{
2687 char *encoding = NULL;
2688 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002689 PyObject *v;
2690
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002691 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2692 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002693 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002694 if (v == NULL)
2695 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002696 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2697 PyErr_Format(PyExc_TypeError,
2698 "encoder did not return a string/unicode object "
2699 "(type=%.400s)",
2700 v->ob_type->tp_name);
2701 Py_DECREF(v);
2702 return NULL;
2703 }
2704 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002705
2706 onError:
2707 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002708}
2709
2710
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002711PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002712"S.decode([encoding[,errors]]) -> object\n\
2713\n\
2714Decodes S using the codec registered for encoding. encoding defaults\n\
2715to the default encoding. errors may be given to set a different error\n\
2716handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002717a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2718as well as any other name registerd with codecs.register_error that is\n\
2719able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002720
2721static PyObject *
2722string_decode(PyStringObject *self, PyObject *args)
2723{
2724 char *encoding = NULL;
2725 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002726 PyObject *v;
2727
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002728 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2729 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002730 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002731 if (v == NULL)
2732 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002733 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2734 PyErr_Format(PyExc_TypeError,
2735 "decoder did not return a string/unicode object "
2736 "(type=%.400s)",
2737 v->ob_type->tp_name);
2738 Py_DECREF(v);
2739 return NULL;
2740 }
2741 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002742
2743 onError:
2744 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002745}
2746
2747
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002748PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002749"S.expandtabs([tabsize]) -> string\n\
2750\n\
2751Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002752If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002753
2754static PyObject*
2755string_expandtabs(PyStringObject *self, PyObject *args)
2756{
2757 const char *e, *p;
2758 char *q;
2759 int i, j;
2760 PyObject *u;
2761 int tabsize = 8;
2762
2763 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2764 return NULL;
2765
Thomas Wouters7e474022000-07-16 12:04:32 +00002766 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002767 i = j = 0;
2768 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2769 for (p = PyString_AS_STRING(self); p < e; p++)
2770 if (*p == '\t') {
2771 if (tabsize > 0)
2772 j += tabsize - (j % tabsize);
2773 }
2774 else {
2775 j++;
2776 if (*p == '\n' || *p == '\r') {
2777 i += j;
2778 j = 0;
2779 }
2780 }
2781
2782 /* Second pass: create output string and fill it */
2783 u = PyString_FromStringAndSize(NULL, i + j);
2784 if (!u)
2785 return NULL;
2786
2787 j = 0;
2788 q = PyString_AS_STRING(u);
2789
2790 for (p = PyString_AS_STRING(self); p < e; p++)
2791 if (*p == '\t') {
2792 if (tabsize > 0) {
2793 i = tabsize - (j % tabsize);
2794 j += i;
2795 while (i--)
2796 *q++ = ' ';
2797 }
2798 }
2799 else {
2800 j++;
2801 *q++ = *p;
2802 if (*p == '\n' || *p == '\r')
2803 j = 0;
2804 }
2805
2806 return u;
2807}
2808
Tim Peters8fa5dd02001-09-12 02:18:30 +00002809static PyObject *
2810pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002811{
2812 PyObject *u;
2813
2814 if (left < 0)
2815 left = 0;
2816 if (right < 0)
2817 right = 0;
2818
Tim Peters8fa5dd02001-09-12 02:18:30 +00002819 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002820 Py_INCREF(self);
2821 return (PyObject *)self;
2822 }
2823
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002824 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002825 left + PyString_GET_SIZE(self) + right);
2826 if (u) {
2827 if (left)
2828 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002829 memcpy(PyString_AS_STRING(u) + left,
2830 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002831 PyString_GET_SIZE(self));
2832 if (right)
2833 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2834 fill, right);
2835 }
2836
2837 return u;
2838}
2839
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002840PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002841"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002842"\n"
2843"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002844"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845
2846static PyObject *
2847string_ljust(PyStringObject *self, PyObject *args)
2848{
2849 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002850 char fillchar = ' ';
2851
2852 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002853 return NULL;
2854
Tim Peters8fa5dd02001-09-12 02:18:30 +00002855 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002856 Py_INCREF(self);
2857 return (PyObject*) self;
2858 }
2859
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002860 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002861}
2862
2863
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002864PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002865"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002866"\n"
2867"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002868"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869
2870static PyObject *
2871string_rjust(PyStringObject *self, PyObject *args)
2872{
2873 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002874 char fillchar = ' ';
2875
2876 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877 return NULL;
2878
Tim Peters8fa5dd02001-09-12 02:18:30 +00002879 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002880 Py_INCREF(self);
2881 return (PyObject*) self;
2882 }
2883
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002884 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002885}
2886
2887
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002888PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002889"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002890"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002891"Return S centered in a string of length width. Padding is\n"
2892"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002893
2894static PyObject *
2895string_center(PyStringObject *self, PyObject *args)
2896{
2897 int marg, left;
2898 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002899 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002900
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002901 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002902 return NULL;
2903
Tim Peters8fa5dd02001-09-12 02:18:30 +00002904 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002905 Py_INCREF(self);
2906 return (PyObject*) self;
2907 }
2908
2909 marg = width - PyString_GET_SIZE(self);
2910 left = marg / 2 + (marg & width & 1);
2911
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002912 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002913}
2914
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002915PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002916"S.zfill(width) -> string\n"
2917"\n"
2918"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002919"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002920
2921static PyObject *
2922string_zfill(PyStringObject *self, PyObject *args)
2923{
2924 int fill;
2925 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002926 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002927
2928 int width;
2929 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2930 return NULL;
2931
2932 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002933 if (PyString_CheckExact(self)) {
2934 Py_INCREF(self);
2935 return (PyObject*) self;
2936 }
2937 else
2938 return PyString_FromStringAndSize(
2939 PyString_AS_STRING(self),
2940 PyString_GET_SIZE(self)
2941 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002942 }
2943
2944 fill = width - PyString_GET_SIZE(self);
2945
2946 s = pad(self, fill, 0, '0');
2947
2948 if (s == NULL)
2949 return NULL;
2950
2951 p = PyString_AS_STRING(s);
2952 if (p[fill] == '+' || p[fill] == '-') {
2953 /* move sign to beginning of string */
2954 p[0] = p[fill];
2955 p[fill] = '0';
2956 }
2957
2958 return (PyObject*) s;
2959}
2960
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002961PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002962"S.isspace() -> bool\n\
2963\n\
2964Return True if all characters in S are whitespace\n\
2965and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002966
2967static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002968string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002969{
Fred Drakeba096332000-07-09 07:04:36 +00002970 register const unsigned char *p
2971 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002972 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002973
Guido van Rossum4c08d552000-03-10 22:55:18 +00002974 /* Shortcut for single character strings */
2975 if (PyString_GET_SIZE(self) == 1 &&
2976 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002977 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002978
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002979 /* Special case for empty strings */
2980 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002981 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002982
Guido van Rossum4c08d552000-03-10 22:55:18 +00002983 e = p + PyString_GET_SIZE(self);
2984 for (; p < e; p++) {
2985 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002986 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002988 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002989}
2990
2991
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002992PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002993"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002994\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002995Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002996and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002997
2998static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002999string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003000{
Fred Drakeba096332000-07-09 07:04:36 +00003001 register const unsigned char *p
3002 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003003 register const unsigned char *e;
3004
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003005 /* Shortcut for single character strings */
3006 if (PyString_GET_SIZE(self) == 1 &&
3007 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003008 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003009
3010 /* Special case for empty strings */
3011 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003012 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003013
3014 e = p + PyString_GET_SIZE(self);
3015 for (; p < e; p++) {
3016 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003017 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003018 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003019 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003020}
3021
3022
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003023PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003024"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003025\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003026Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003027and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003028
3029static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003030string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003031{
Fred Drakeba096332000-07-09 07:04:36 +00003032 register const unsigned char *p
3033 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003034 register const unsigned char *e;
3035
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003036 /* Shortcut for single character strings */
3037 if (PyString_GET_SIZE(self) == 1 &&
3038 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003039 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003040
3041 /* Special case for empty strings */
3042 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003043 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003044
3045 e = p + PyString_GET_SIZE(self);
3046 for (; p < e; p++) {
3047 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003048 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003049 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003050 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003051}
3052
3053
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003054PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003055"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003056\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003057Return True if all characters in S are digits\n\
3058and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059
3060static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003061string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003062{
Fred Drakeba096332000-07-09 07:04:36 +00003063 register const unsigned char *p
3064 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003065 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003066
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067 /* Shortcut for single character strings */
3068 if (PyString_GET_SIZE(self) == 1 &&
3069 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003070 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003072 /* Special case for empty strings */
3073 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003074 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003075
Guido van Rossum4c08d552000-03-10 22:55:18 +00003076 e = p + PyString_GET_SIZE(self);
3077 for (; p < e; p++) {
3078 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003079 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003081 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082}
3083
3084
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003085PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003086"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003087\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003088Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003089at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090
3091static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003092string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003093{
Fred Drakeba096332000-07-09 07:04:36 +00003094 register const unsigned char *p
3095 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003096 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003097 int cased;
3098
Guido van Rossum4c08d552000-03-10 22:55:18 +00003099 /* Shortcut for single character strings */
3100 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003101 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003102
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003103 /* Special case for empty strings */
3104 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003105 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003106
Guido van Rossum4c08d552000-03-10 22:55:18 +00003107 e = p + PyString_GET_SIZE(self);
3108 cased = 0;
3109 for (; p < e; p++) {
3110 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003111 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003112 else if (!cased && islower(*p))
3113 cased = 1;
3114 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003115 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003116}
3117
3118
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003119PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003120"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003121\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003122Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003123at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003124
3125static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003126string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003127{
Fred Drakeba096332000-07-09 07:04:36 +00003128 register const unsigned char *p
3129 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003130 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003131 int cased;
3132
Guido van Rossum4c08d552000-03-10 22:55:18 +00003133 /* Shortcut for single character strings */
3134 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003135 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003136
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003137 /* Special case for empty strings */
3138 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003139 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003140
Guido van Rossum4c08d552000-03-10 22:55:18 +00003141 e = p + PyString_GET_SIZE(self);
3142 cased = 0;
3143 for (; p < e; p++) {
3144 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003145 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003146 else if (!cased && isupper(*p))
3147 cased = 1;
3148 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003149 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150}
3151
3152
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003153PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003154"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003156Return True if S is a titlecased string and there is at least one\n\
3157character in S, i.e. uppercase characters may only follow uncased\n\
3158characters and lowercase characters only cased ones. Return False\n\
3159otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003160
3161static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003162string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003163{
Fred Drakeba096332000-07-09 07:04:36 +00003164 register const unsigned char *p
3165 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003166 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167 int cased, previous_is_cased;
3168
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 /* Shortcut for single character strings */
3170 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003171 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003172
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003173 /* Special case for empty strings */
3174 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003175 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003176
Guido van Rossum4c08d552000-03-10 22:55:18 +00003177 e = p + PyString_GET_SIZE(self);
3178 cased = 0;
3179 previous_is_cased = 0;
3180 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003181 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182
3183 if (isupper(ch)) {
3184 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003185 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186 previous_is_cased = 1;
3187 cased = 1;
3188 }
3189 else if (islower(ch)) {
3190 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003191 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003192 previous_is_cased = 1;
3193 cased = 1;
3194 }
3195 else
3196 previous_is_cased = 0;
3197 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003198 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003199}
3200
3201
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003202PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003203"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003204\n\
3205Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003206Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003207is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208
Guido van Rossum4c08d552000-03-10 22:55:18 +00003209static PyObject*
3210string_splitlines(PyStringObject *self, PyObject *args)
3211{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003212 register int i;
3213 register int j;
3214 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003215 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003216 PyObject *list;
3217 PyObject *str;
3218 char *data;
3219
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003220 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003221 return NULL;
3222
3223 data = PyString_AS_STRING(self);
3224 len = PyString_GET_SIZE(self);
3225
Guido van Rossum4c08d552000-03-10 22:55:18 +00003226 list = PyList_New(0);
3227 if (!list)
3228 goto onError;
3229
3230 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003231 int eol;
3232
Guido van Rossum4c08d552000-03-10 22:55:18 +00003233 /* Find a line and append it */
3234 while (i < len && data[i] != '\n' && data[i] != '\r')
3235 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003236
3237 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003238 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003239 if (i < len) {
3240 if (data[i] == '\r' && i + 1 < len &&
3241 data[i+1] == '\n')
3242 i += 2;
3243 else
3244 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003245 if (keepends)
3246 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003247 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003248 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003249 j = i;
3250 }
3251 if (j < len) {
3252 SPLIT_APPEND(data, j, len);
3253 }
3254
3255 return list;
3256
3257 onError:
3258 Py_DECREF(list);
3259 return NULL;
3260}
3261
3262#undef SPLIT_APPEND
3263
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003264static PyObject *
3265string_getnewargs(PyStringObject *v)
3266{
3267 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3268}
3269
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003270
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003271static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003272string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003273 /* Counterparts of the obsolete stropmodule functions; except
3274 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003275 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3276 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003277 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003278 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3279 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003280 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3281 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3282 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3283 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3284 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3285 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3286 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003287 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3288 capitalize__doc__},
3289 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3290 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3291 endswith__doc__},
3292 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3293 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3294 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3295 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3296 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3297 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3298 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3299 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3300 startswith__doc__},
3301 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3302 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3303 swapcase__doc__},
3304 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3305 translate__doc__},
3306 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3307 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3308 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3309 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3310 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3311 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3312 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3313 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3314 expandtabs__doc__},
3315 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3316 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003317 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003318 {NULL, NULL} /* sentinel */
3319};
3320
Jeremy Hylton938ace62002-07-17 16:30:39 +00003321static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003322str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3323
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003324static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003325string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003326{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003327 PyObject *x = NULL;
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00003328 static const char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003329
Guido van Rossumae960af2001-08-30 03:11:59 +00003330 if (type != &PyString_Type)
3331 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003332 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3333 return NULL;
3334 if (x == NULL)
3335 return PyString_FromString("");
3336 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003337}
3338
Guido van Rossumae960af2001-08-30 03:11:59 +00003339static PyObject *
3340str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3341{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003342 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003343 int n;
3344
3345 assert(PyType_IsSubtype(type, &PyString_Type));
3346 tmp = string_new(&PyString_Type, args, kwds);
3347 if (tmp == NULL)
3348 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003349 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003350 n = PyString_GET_SIZE(tmp);
3351 pnew = type->tp_alloc(type, n);
3352 if (pnew != NULL) {
3353 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003354 ((PyStringObject *)pnew)->ob_shash =
3355 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003356 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003357 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003358 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003359 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003360}
3361
Guido van Rossumcacfc072002-05-24 19:01:59 +00003362static PyObject *
3363basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3364{
3365 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003366 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003367 return NULL;
3368}
3369
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003370static PyObject *
3371string_mod(PyObject *v, PyObject *w)
3372{
3373 if (!PyString_Check(v)) {
3374 Py_INCREF(Py_NotImplemented);
3375 return Py_NotImplemented;
3376 }
3377 return PyString_Format(v, w);
3378}
3379
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003380PyDoc_STRVAR(basestring_doc,
3381"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003382
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003383static PyNumberMethods string_as_number = {
3384 0, /*nb_add*/
3385 0, /*nb_subtract*/
3386 0, /*nb_multiply*/
3387 0, /*nb_divide*/
3388 string_mod, /*nb_remainder*/
3389};
3390
3391
Guido van Rossumcacfc072002-05-24 19:01:59 +00003392PyTypeObject PyBaseString_Type = {
3393 PyObject_HEAD_INIT(&PyType_Type)
3394 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003395 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003396 0,
3397 0,
3398 0, /* tp_dealloc */
3399 0, /* tp_print */
3400 0, /* tp_getattr */
3401 0, /* tp_setattr */
3402 0, /* tp_compare */
3403 0, /* tp_repr */
3404 0, /* tp_as_number */
3405 0, /* tp_as_sequence */
3406 0, /* tp_as_mapping */
3407 0, /* tp_hash */
3408 0, /* tp_call */
3409 0, /* tp_str */
3410 0, /* tp_getattro */
3411 0, /* tp_setattro */
3412 0, /* tp_as_buffer */
3413 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3414 basestring_doc, /* tp_doc */
3415 0, /* tp_traverse */
3416 0, /* tp_clear */
3417 0, /* tp_richcompare */
3418 0, /* tp_weaklistoffset */
3419 0, /* tp_iter */
3420 0, /* tp_iternext */
3421 0, /* tp_methods */
3422 0, /* tp_members */
3423 0, /* tp_getset */
3424 &PyBaseObject_Type, /* tp_base */
3425 0, /* tp_dict */
3426 0, /* tp_descr_get */
3427 0, /* tp_descr_set */
3428 0, /* tp_dictoffset */
3429 0, /* tp_init */
3430 0, /* tp_alloc */
3431 basestring_new, /* tp_new */
3432 0, /* tp_free */
3433};
3434
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003435PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003436"str(object) -> string\n\
3437\n\
3438Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003439If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003440
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003441PyTypeObject PyString_Type = {
3442 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003443 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003444 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003445 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003446 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003447 (destructor)string_dealloc, /* tp_dealloc */
3448 (printfunc)string_print, /* tp_print */
3449 0, /* tp_getattr */
3450 0, /* tp_setattr */
3451 0, /* tp_compare */
3452 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003453 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003454 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003455 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003456 (hashfunc)string_hash, /* tp_hash */
3457 0, /* tp_call */
3458 (reprfunc)string_str, /* tp_str */
3459 PyObject_GenericGetAttr, /* tp_getattro */
3460 0, /* tp_setattro */
3461 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003462 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3463 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003464 string_doc, /* tp_doc */
3465 0, /* tp_traverse */
3466 0, /* tp_clear */
3467 (richcmpfunc)string_richcompare, /* tp_richcompare */
3468 0, /* tp_weaklistoffset */
3469 0, /* tp_iter */
3470 0, /* tp_iternext */
3471 string_methods, /* tp_methods */
3472 0, /* tp_members */
3473 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003474 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003475 0, /* tp_dict */
3476 0, /* tp_descr_get */
3477 0, /* tp_descr_set */
3478 0, /* tp_dictoffset */
3479 0, /* tp_init */
3480 0, /* tp_alloc */
3481 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003482 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003483};
3484
3485void
Fred Drakeba096332000-07-09 07:04:36 +00003486PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003487{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003488 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003489 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003490 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003491 if (w == NULL || !PyString_Check(*pv)) {
3492 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003493 *pv = NULL;
3494 return;
3495 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003496 v = string_concat((PyStringObject *) *pv, w);
3497 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003498 *pv = v;
3499}
3500
Guido van Rossum013142a1994-08-30 08:19:36 +00003501void
Fred Drakeba096332000-07-09 07:04:36 +00003502PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003503{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003504 PyString_Concat(pv, w);
3505 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003506}
3507
3508
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003509/* The following function breaks the notion that strings are immutable:
3510 it changes the size of a string. We get away with this only if there
3511 is only one module referencing the object. You can also think of it
3512 as creating a new string object and destroying the old one, only
3513 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003514 already be known to some other part of the code...
3515 Note that if there's not enough memory to resize the string, the original
3516 string object at *pv is deallocated, *pv is set to NULL, an "out of
3517 memory" exception is set, and -1 is returned. Else (on success) 0 is
3518 returned, and the value in *pv may or may not be the same as on input.
3519 As always, an extra byte is allocated for a trailing \0 byte (newsize
3520 does *not* include that), and a trailing \0 byte is stored.
3521*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003522
3523int
Fred Drakeba096332000-07-09 07:04:36 +00003524_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003525{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003526 register PyObject *v;
3527 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003528 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003529 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3530 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003531 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003532 Py_DECREF(v);
3533 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003534 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003535 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003536 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003537 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003538 _Py_ForgetReference(v);
3539 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003540 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003541 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003542 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003543 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003544 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003545 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003546 _Py_NewReference(*pv);
3547 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003548 sv->ob_size = newsize;
3549 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003550 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003551 return 0;
3552}
Guido van Rossume5372401993-03-16 12:15:04 +00003553
3554/* Helpers for formatstring */
3555
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003556static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003557getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003558{
3559 int argidx = *p_argidx;
3560 if (argidx < arglen) {
3561 (*p_argidx)++;
3562 if (arglen < 0)
3563 return args;
3564 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003565 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003566 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003567 PyErr_SetString(PyExc_TypeError,
3568 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003569 return NULL;
3570}
3571
Tim Peters38fd5b62000-09-21 05:43:11 +00003572/* Format codes
3573 * F_LJUST '-'
3574 * F_SIGN '+'
3575 * F_BLANK ' '
3576 * F_ALT '#'
3577 * F_ZERO '0'
3578 */
Guido van Rossume5372401993-03-16 12:15:04 +00003579#define F_LJUST (1<<0)
3580#define F_SIGN (1<<1)
3581#define F_BLANK (1<<2)
3582#define F_ALT (1<<3)
3583#define F_ZERO (1<<4)
3584
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003585static int
Fred Drakeba096332000-07-09 07:04:36 +00003586formatfloat(char *buf, size_t buflen, int flags,
3587 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003588{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003589 /* fmt = '%#.' + `prec` + `type`
3590 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003591 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003592 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003593 x = PyFloat_AsDouble(v);
3594 if (x == -1.0 && PyErr_Occurred()) {
3595 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003596 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003597 }
Guido van Rossume5372401993-03-16 12:15:04 +00003598 if (prec < 0)
3599 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003600 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3601 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003602 /* Worst case length calc to ensure no buffer overrun:
3603
3604 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003605 fmt = %#.<prec>g
3606 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003607 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003608 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003609
3610 'f' formats:
3611 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3612 len = 1 + 50 + 1 + prec = 52 + prec
3613
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003614 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003615 always given), therefore increase the length by one.
3616
3617 */
3618 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3619 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003620 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003621 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003622 return -1;
3623 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003624 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3625 (flags&F_ALT) ? "#" : "",
3626 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003627 PyOS_ascii_formatd(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003628 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003629}
3630
Tim Peters38fd5b62000-09-21 05:43:11 +00003631/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3632 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3633 * Python's regular ints.
3634 * Return value: a new PyString*, or NULL if error.
3635 * . *pbuf is set to point into it,
3636 * *plen set to the # of chars following that.
3637 * Caller must decref it when done using pbuf.
3638 * The string starting at *pbuf is of the form
3639 * "-"? ("0x" | "0X")? digit+
3640 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003641 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003642 * There will be at least prec digits, zero-filled on the left if
3643 * necessary to get that many.
3644 * val object to be converted
3645 * flags bitmask of format flags; only F_ALT is looked at
3646 * prec minimum number of digits; 0-fill on left if needed
3647 * type a character in [duoxX]; u acts the same as d
3648 *
3649 * CAUTION: o, x and X conversions on regular ints can never
3650 * produce a '-' sign, but can for Python's unbounded ints.
3651 */
3652PyObject*
3653_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3654 char **pbuf, int *plen)
3655{
3656 PyObject *result = NULL;
3657 char *buf;
3658 int i;
3659 int sign; /* 1 if '-', else 0 */
3660 int len; /* number of characters */
3661 int numdigits; /* len == numnondigits + numdigits */
3662 int numnondigits = 0;
3663
3664 switch (type) {
3665 case 'd':
3666 case 'u':
3667 result = val->ob_type->tp_str(val);
3668 break;
3669 case 'o':
3670 result = val->ob_type->tp_as_number->nb_oct(val);
3671 break;
3672 case 'x':
3673 case 'X':
3674 numnondigits = 2;
3675 result = val->ob_type->tp_as_number->nb_hex(val);
3676 break;
3677 default:
3678 assert(!"'type' not in [duoxX]");
3679 }
3680 if (!result)
3681 return NULL;
3682
3683 /* To modify the string in-place, there can only be one reference. */
3684 if (result->ob_refcnt != 1) {
3685 PyErr_BadInternalCall();
3686 return NULL;
3687 }
3688 buf = PyString_AsString(result);
3689 len = PyString_Size(result);
3690 if (buf[len-1] == 'L') {
3691 --len;
3692 buf[len] = '\0';
3693 }
3694 sign = buf[0] == '-';
3695 numnondigits += sign;
3696 numdigits = len - numnondigits;
3697 assert(numdigits > 0);
3698
Tim Petersfff53252001-04-12 18:38:48 +00003699 /* Get rid of base marker unless F_ALT */
3700 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003701 /* Need to skip 0x, 0X or 0. */
3702 int skipped = 0;
3703 switch (type) {
3704 case 'o':
3705 assert(buf[sign] == '0');
3706 /* If 0 is only digit, leave it alone. */
3707 if (numdigits > 1) {
3708 skipped = 1;
3709 --numdigits;
3710 }
3711 break;
3712 case 'x':
3713 case 'X':
3714 assert(buf[sign] == '0');
3715 assert(buf[sign + 1] == 'x');
3716 skipped = 2;
3717 numnondigits -= 2;
3718 break;
3719 }
3720 if (skipped) {
3721 buf += skipped;
3722 len -= skipped;
3723 if (sign)
3724 buf[0] = '-';
3725 }
3726 assert(len == numnondigits + numdigits);
3727 assert(numdigits > 0);
3728 }
3729
3730 /* Fill with leading zeroes to meet minimum width. */
3731 if (prec > numdigits) {
3732 PyObject *r1 = PyString_FromStringAndSize(NULL,
3733 numnondigits + prec);
3734 char *b1;
3735 if (!r1) {
3736 Py_DECREF(result);
3737 return NULL;
3738 }
3739 b1 = PyString_AS_STRING(r1);
3740 for (i = 0; i < numnondigits; ++i)
3741 *b1++ = *buf++;
3742 for (i = 0; i < prec - numdigits; i++)
3743 *b1++ = '0';
3744 for (i = 0; i < numdigits; i++)
3745 *b1++ = *buf++;
3746 *b1 = '\0';
3747 Py_DECREF(result);
3748 result = r1;
3749 buf = PyString_AS_STRING(result);
3750 len = numnondigits + prec;
3751 }
3752
3753 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003754 if (type == 'X') {
3755 /* Need to convert all lower case letters to upper case.
3756 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003757 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003758 if (buf[i] >= 'a' && buf[i] <= 'x')
3759 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003760 }
3761 *pbuf = buf;
3762 *plen = len;
3763 return result;
3764}
3765
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003766static int
Fred Drakeba096332000-07-09 07:04:36 +00003767formatint(char *buf, size_t buflen, int flags,
3768 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003769{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003770 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003771 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3772 + 1 + 1 = 24 */
3773 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003774 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003775 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003776
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003777 x = PyInt_AsLong(v);
3778 if (x == -1 && PyErr_Occurred()) {
3779 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003780 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003781 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003782 if (x < 0 && type == 'u') {
3783 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003784 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003785 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3786 sign = "-";
3787 else
3788 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003789 if (prec < 0)
3790 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003791
3792 if ((flags & F_ALT) &&
3793 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003794 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003795 * of issues that cause pain:
3796 * - when 0 is being converted, the C standard leaves off
3797 * the '0x' or '0X', which is inconsistent with other
3798 * %#x/%#X conversions and inconsistent with Python's
3799 * hex() function
3800 * - there are platforms that violate the standard and
3801 * convert 0 with the '0x' or '0X'
3802 * (Metrowerks, Compaq Tru64)
3803 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003804 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003805 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003806 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003807 * We can achieve the desired consistency by inserting our
3808 * own '0x' or '0X' prefix, and substituting %x/%X in place
3809 * of %#x/%#X.
3810 *
3811 * Note that this is the same approach as used in
3812 * formatint() in unicodeobject.c
3813 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003814 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3815 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003816 }
3817 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003818 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3819 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003820 prec, type);
3821 }
3822
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003823 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3824 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003825 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003826 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003827 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003828 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003829 return -1;
3830 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003831 if (sign[0])
3832 PyOS_snprintf(buf, buflen, fmt, -x);
3833 else
3834 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003835 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003836}
3837
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003838static int
Fred Drakeba096332000-07-09 07:04:36 +00003839formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003840{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003841 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003842 if (PyString_Check(v)) {
3843 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003844 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003845 }
3846 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003847 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003848 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003849 }
3850 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003851 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003852}
3853
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003854/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3855
3856 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3857 chars are formatted. XXX This is a magic number. Each formatting
3858 routine does bounds checking to ensure no overflow, but a better
3859 solution may be to malloc a buffer of appropriate size for each
3860 format. For now, the current solution is sufficient.
3861*/
3862#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003863
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003864PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003865PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003866{
3867 char *fmt, *res;
3868 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003869 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003870 PyObject *result, *orig_args;
3871#ifdef Py_USING_UNICODE
3872 PyObject *v, *w;
3873#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003874 PyObject *dict = NULL;
3875 if (format == NULL || !PyString_Check(format) || args == NULL) {
3876 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003877 return NULL;
3878 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003879 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003880 fmt = PyString_AS_STRING(format);
3881 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003882 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003883 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003884 if (result == NULL)
3885 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003886 res = PyString_AsString(result);
3887 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003888 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003889 argidx = 0;
3890 }
3891 else {
3892 arglen = -1;
3893 argidx = -2;
3894 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003895 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3896 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003897 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003898 while (--fmtcnt >= 0) {
3899 if (*fmt != '%') {
3900 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003901 rescnt = fmtcnt + 100;
3902 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003903 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003904 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003905 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003906 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003907 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003908 }
3909 *res++ = *fmt++;
3910 }
3911 else {
3912 /* Got a format specifier */
3913 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003914 int width = -1;
3915 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003916 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003917 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003918 PyObject *v = NULL;
3919 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003920 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003921 int sign;
3922 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003923 char formatbuf[FORMATBUFLEN];
3924 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003925#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003926 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003927 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003928#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003929
Guido van Rossumda9c2711996-12-05 21:58:58 +00003930 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003931 if (*fmt == '(') {
3932 char *keystart;
3933 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003934 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003935 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003936
3937 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003938 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003939 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003940 goto error;
3941 }
3942 ++fmt;
3943 --fmtcnt;
3944 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003945 /* Skip over balanced parentheses */
3946 while (pcount > 0 && --fmtcnt >= 0) {
3947 if (*fmt == ')')
3948 --pcount;
3949 else if (*fmt == '(')
3950 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003951 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003952 }
3953 keylen = fmt - keystart - 1;
3954 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003955 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003956 "incomplete format key");
3957 goto error;
3958 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003959 key = PyString_FromStringAndSize(keystart,
3960 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003961 if (key == NULL)
3962 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003963 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003964 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003965 args_owned = 0;
3966 }
3967 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003968 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003969 if (args == NULL) {
3970 goto error;
3971 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003972 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003973 arglen = -1;
3974 argidx = -2;
3975 }
Guido van Rossume5372401993-03-16 12:15:04 +00003976 while (--fmtcnt >= 0) {
3977 switch (c = *fmt++) {
3978 case '-': flags |= F_LJUST; continue;
3979 case '+': flags |= F_SIGN; continue;
3980 case ' ': flags |= F_BLANK; continue;
3981 case '#': flags |= F_ALT; continue;
3982 case '0': flags |= F_ZERO; continue;
3983 }
3984 break;
3985 }
3986 if (c == '*') {
3987 v = getnextarg(args, arglen, &argidx);
3988 if (v == NULL)
3989 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003990 if (!PyInt_Check(v)) {
3991 PyErr_SetString(PyExc_TypeError,
3992 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003993 goto error;
3994 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003995 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003996 if (width < 0) {
3997 flags |= F_LJUST;
3998 width = -width;
3999 }
Guido van Rossume5372401993-03-16 12:15:04 +00004000 if (--fmtcnt >= 0)
4001 c = *fmt++;
4002 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004003 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004004 width = c - '0';
4005 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004006 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004007 if (!isdigit(c))
4008 break;
4009 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004010 PyErr_SetString(
4011 PyExc_ValueError,
4012 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004013 goto error;
4014 }
4015 width = width*10 + (c - '0');
4016 }
4017 }
4018 if (c == '.') {
4019 prec = 0;
4020 if (--fmtcnt >= 0)
4021 c = *fmt++;
4022 if (c == '*') {
4023 v = getnextarg(args, arglen, &argidx);
4024 if (v == NULL)
4025 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004026 if (!PyInt_Check(v)) {
4027 PyErr_SetString(
4028 PyExc_TypeError,
4029 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004030 goto error;
4031 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004032 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004033 if (prec < 0)
4034 prec = 0;
4035 if (--fmtcnt >= 0)
4036 c = *fmt++;
4037 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004038 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004039 prec = c - '0';
4040 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004041 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004042 if (!isdigit(c))
4043 break;
4044 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004045 PyErr_SetString(
4046 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004047 "prec too big");
4048 goto error;
4049 }
4050 prec = prec*10 + (c - '0');
4051 }
4052 }
4053 } /* prec */
4054 if (fmtcnt >= 0) {
4055 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004056 if (--fmtcnt >= 0)
4057 c = *fmt++;
4058 }
4059 }
4060 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004061 PyErr_SetString(PyExc_ValueError,
4062 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004063 goto error;
4064 }
4065 if (c != '%') {
4066 v = getnextarg(args, arglen, &argidx);
4067 if (v == NULL)
4068 goto error;
4069 }
4070 sign = 0;
4071 fill = ' ';
4072 switch (c) {
4073 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004074 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004075 len = 1;
4076 break;
4077 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004078#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004079 if (PyUnicode_Check(v)) {
4080 fmt = fmt_start;
4081 argidx = argidx_start;
4082 goto unicode;
4083 }
Georg Brandld45014b2005-10-01 17:06:00 +00004084#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004085 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004086#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004087 if (temp != NULL && PyUnicode_Check(temp)) {
4088 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004089 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004090 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004091 goto unicode;
4092 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004093#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004094 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004095 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004096 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004097 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004098 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004099 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004100 if (!PyString_Check(temp)) {
4101 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004102 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004103 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004104 goto error;
4105 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004106 pbuf = PyString_AS_STRING(temp);
4107 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004108 if (prec >= 0 && len > prec)
4109 len = prec;
4110 break;
4111 case 'i':
4112 case 'd':
4113 case 'u':
4114 case 'o':
4115 case 'x':
4116 case 'X':
4117 if (c == 'i')
4118 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004119 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004120 temp = _PyString_FormatLong(v, flags,
4121 prec, c, &pbuf, &len);
4122 if (!temp)
4123 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004124 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004125 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004126 else {
4127 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004128 len = formatint(pbuf,
4129 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004130 flags, prec, c, v);
4131 if (len < 0)
4132 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004133 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004134 }
4135 if (flags & F_ZERO)
4136 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004137 break;
4138 case 'e':
4139 case 'E':
4140 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004141 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004142 case 'g':
4143 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004144 if (c == 'F')
4145 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004146 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004147 len = formatfloat(pbuf, sizeof(formatbuf),
4148 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004149 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004150 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004151 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004152 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004153 fill = '0';
4154 break;
4155 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004156#ifdef Py_USING_UNICODE
4157 if (PyUnicode_Check(v)) {
4158 fmt = fmt_start;
4159 argidx = argidx_start;
4160 goto unicode;
4161 }
4162#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004163 pbuf = formatbuf;
4164 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004165 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004166 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004167 break;
4168 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004169 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004170 "unsupported format character '%c' (0x%x) "
4171 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004172 c, c,
4173 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004174 goto error;
4175 }
4176 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004177 if (*pbuf == '-' || *pbuf == '+') {
4178 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004179 len--;
4180 }
4181 else if (flags & F_SIGN)
4182 sign = '+';
4183 else if (flags & F_BLANK)
4184 sign = ' ';
4185 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004186 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004187 }
4188 if (width < len)
4189 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004190 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004191 reslen -= rescnt;
4192 rescnt = width + fmtcnt + 100;
4193 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004194 if (reslen < 0) {
4195 Py_DECREF(result);
4196 return PyErr_NoMemory();
4197 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004198 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004199 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004200 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004201 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004202 }
4203 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004204 if (fill != ' ')
4205 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004206 rescnt--;
4207 if (width > len)
4208 width--;
4209 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004210 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4211 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004212 assert(pbuf[1] == c);
4213 if (fill != ' ') {
4214 *res++ = *pbuf++;
4215 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004216 }
Tim Petersfff53252001-04-12 18:38:48 +00004217 rescnt -= 2;
4218 width -= 2;
4219 if (width < 0)
4220 width = 0;
4221 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004222 }
4223 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004224 do {
4225 --rescnt;
4226 *res++ = fill;
4227 } while (--width > len);
4228 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004229 if (fill == ' ') {
4230 if (sign)
4231 *res++ = sign;
4232 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004233 (c == 'x' || c == 'X')) {
4234 assert(pbuf[0] == '0');
4235 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004236 *res++ = *pbuf++;
4237 *res++ = *pbuf++;
4238 }
4239 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004240 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004241 res += len;
4242 rescnt -= len;
4243 while (--width >= len) {
4244 --rescnt;
4245 *res++ = ' ';
4246 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004247 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004248 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004249 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004250 goto error;
4251 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004252 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004253 } /* '%' */
4254 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004255 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004256 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004257 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004258 goto error;
4259 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004260 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004261 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004262 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004263 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004264 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004265
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004266#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004267 unicode:
4268 if (args_owned) {
4269 Py_DECREF(args);
4270 args_owned = 0;
4271 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004272 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004273 if (PyTuple_Check(orig_args) && argidx > 0) {
4274 PyObject *v;
4275 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4276 v = PyTuple_New(n);
4277 if (v == NULL)
4278 goto error;
4279 while (--n >= 0) {
4280 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4281 Py_INCREF(w);
4282 PyTuple_SET_ITEM(v, n, w);
4283 }
4284 args = v;
4285 } else {
4286 Py_INCREF(orig_args);
4287 args = orig_args;
4288 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004289 args_owned = 1;
4290 /* Take what we have of the result and let the Unicode formatting
4291 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004292 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004293 if (_PyString_Resize(&result, rescnt))
4294 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004295 fmtcnt = PyString_GET_SIZE(format) - \
4296 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004297 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4298 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004299 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004300 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004301 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004302 if (v == NULL)
4303 goto error;
4304 /* Paste what we have (result) to what the Unicode formatting
4305 function returned (v) and return the result (or error) */
4306 w = PyUnicode_Concat(result, v);
4307 Py_DECREF(result);
4308 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004309 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004310 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004311#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004312
Guido van Rossume5372401993-03-16 12:15:04 +00004313 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004314 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004315 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004316 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004317 }
Guido van Rossume5372401993-03-16 12:15:04 +00004318 return NULL;
4319}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004320
Guido van Rossum2a61e741997-01-18 07:55:05 +00004321void
Fred Drakeba096332000-07-09 07:04:36 +00004322PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004323{
4324 register PyStringObject *s = (PyStringObject *)(*p);
4325 PyObject *t;
4326 if (s == NULL || !PyString_Check(s))
4327 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004328 /* If it's a string subclass, we don't really know what putting
4329 it in the interned dict might do. */
4330 if (!PyString_CheckExact(s))
4331 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004332 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004333 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004334 if (interned == NULL) {
4335 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004336 if (interned == NULL) {
4337 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004338 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004339 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004340 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004341 t = PyDict_GetItem(interned, (PyObject *)s);
4342 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004343 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004344 Py_DECREF(*p);
4345 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004346 return;
4347 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004348
Armin Rigo79f7ad22004-08-07 19:27:39 +00004349 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004350 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004351 return;
4352 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004353 /* The two references in interned are not counted by refcnt.
4354 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004355 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004356 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004357}
4358
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004359void
4360PyString_InternImmortal(PyObject **p)
4361{
4362 PyString_InternInPlace(p);
4363 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4364 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4365 Py_INCREF(*p);
4366 }
4367}
4368
Guido van Rossum2a61e741997-01-18 07:55:05 +00004369
4370PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004371PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004372{
4373 PyObject *s = PyString_FromString(cp);
4374 if (s == NULL)
4375 return NULL;
4376 PyString_InternInPlace(&s);
4377 return s;
4378}
4379
Guido van Rossum8cf04761997-08-02 02:57:45 +00004380void
Fred Drakeba096332000-07-09 07:04:36 +00004381PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004382{
4383 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004384 for (i = 0; i < UCHAR_MAX + 1; i++) {
4385 Py_XDECREF(characters[i]);
4386 characters[i] = NULL;
4387 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004388 Py_XDECREF(nullstring);
4389 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004390}
Barry Warsawa903ad982001-02-23 16:40:48 +00004391
Barry Warsawa903ad982001-02-23 16:40:48 +00004392void _Py_ReleaseInternedStrings(void)
4393{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004394 PyObject *keys;
4395 PyStringObject *s;
4396 int i, n;
4397
4398 if (interned == NULL || !PyDict_Check(interned))
4399 return;
4400 keys = PyDict_Keys(interned);
4401 if (keys == NULL || !PyList_Check(keys)) {
4402 PyErr_Clear();
4403 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004404 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004405
4406 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4407 detector, interned strings are not forcibly deallocated; rather, we
4408 give them their stolen references back, and then clear and DECREF
4409 the interned dict. */
4410
4411 fprintf(stderr, "releasing interned strings\n");
4412 n = PyList_GET_SIZE(keys);
4413 for (i = 0; i < n; i++) {
4414 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4415 switch (s->ob_sstate) {
4416 case SSTATE_NOT_INTERNED:
4417 /* XXX Shouldn't happen */
4418 break;
4419 case SSTATE_INTERNED_IMMORTAL:
4420 s->ob_refcnt += 1;
4421 break;
4422 case SSTATE_INTERNED_MORTAL:
4423 s->ob_refcnt += 2;
4424 break;
4425 default:
4426 Py_FatalError("Inconsistent interned string state.");
4427 }
4428 s->ob_sstate = SSTATE_NOT_INTERNED;
4429 }
4430 Py_DECREF(keys);
4431 PyDict_Clear(interned);
4432 Py_DECREF(interned);
4433 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004434}