blob: 3c1b303a503ed73c2155c9f561c33620c2be3d68 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
18/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000019 For both PyString_FromString() and PyString_FromStringAndSize(), the
20 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000021 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000022
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000023 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000025
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For PyString_FromStringAndSize(), the parameter the parameter `str' is
27 either NULL or else points to a string containing at least `size' bytes.
28 For PyString_FromStringAndSize(), the string in the `str' parameter does
29 not have to be null-terminated. (Therefore it is safe to construct a
30 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
31 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
32 bytes (setting the last byte to the null terminating character) and you can
33 fill in the data yourself. If `str' is non-NULL then the resulting
34 PyString object must be treated as immutable and you must not fill in nor
35 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 The PyObject member `op->ob_size', which denotes the number of "extra
38 items" in a variable-size object, will contain the number of bytes
39 allocated for string data, not counting the null terminating character. It
40 is therefore equal to the equal to the `size' parameter (for
41 PyString_FromStringAndSize()) or the length of the string in the `str'
42 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000067 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000123 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000159#ifdef __va_copy
160 __va_copy(count, vargs);
161#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000162 count = vargs;
163#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000164#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000165 /* step 1: figure out how large a buffer we need */
166 for (f = format; *f; f++) {
167 if (*f == '%') {
168 const char* p = f;
169 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
170 ;
171
172 /* skip the 'l' in %ld, since it doesn't change the
173 width. although only %d is supported (see
174 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000175 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000176 if (*f == 'l' && *(f+1) == 'd')
177 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000178
Barry Warsawdadace02001-08-24 18:32:06 +0000179 switch (*f) {
180 case 'c':
181 (void)va_arg(count, int);
182 /* fall through... */
183 case '%':
184 n++;
185 break;
186 case 'd': case 'i': case 'x':
187 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000188 /* 20 bytes is enough to hold a 64-bit
189 integer. Decimal takes the most space.
190 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000191 n += 20;
192 break;
193 case 's':
194 s = va_arg(count, char*);
195 n += strlen(s);
196 break;
197 case 'p':
198 (void) va_arg(count, int);
199 /* maximum 64-bit pointer representation:
200 * 0xffffffffffffffff
201 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000202 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000203 */
204 n += 19;
205 break;
206 default:
207 /* if we stumble upon an unknown
208 formatting code, copy the rest of
209 the format string to the output
210 string. (we cannot just skip the
211 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000212 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000213 n += strlen(p);
214 goto expand;
215 }
216 } else
217 n++;
218 }
219 expand:
220 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000221 /* Since we've analyzed how much space we need for the worst case,
222 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000223 string = PyString_FromStringAndSize(NULL, n);
224 if (!string)
225 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000226
Barry Warsawdadace02001-08-24 18:32:06 +0000227 s = PyString_AsString(string);
228
229 for (f = format; *f; f++) {
230 if (*f == '%') {
231 const char* p = f++;
232 int i, longflag = 0;
233 /* parse the width.precision part (we're only
234 interested in the precision value, if any) */
235 n = 0;
236 while (isdigit(Py_CHARMASK(*f)))
237 n = (n*10) + *f++ - '0';
238 if (*f == '.') {
239 f++;
240 n = 0;
241 while (isdigit(Py_CHARMASK(*f)))
242 n = (n*10) + *f++ - '0';
243 }
244 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
245 f++;
246 /* handle the long flag, but only for %ld. others
247 can be added when necessary. */
248 if (*f == 'l' && *(f+1) == 'd') {
249 longflag = 1;
250 ++f;
251 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000252
Barry Warsawdadace02001-08-24 18:32:06 +0000253 switch (*f) {
254 case 'c':
255 *s++ = va_arg(vargs, int);
256 break;
257 case 'd':
258 if (longflag)
259 sprintf(s, "%ld", va_arg(vargs, long));
260 else
261 sprintf(s, "%d", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'i':
265 sprintf(s, "%i", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 'x':
269 sprintf(s, "%x", va_arg(vargs, int));
270 s += strlen(s);
271 break;
272 case 's':
273 p = va_arg(vargs, char*);
274 i = strlen(p);
275 if (n > 0 && i > n)
276 i = n;
277 memcpy(s, p, i);
278 s += i;
279 break;
280 case 'p':
281 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000282 /* %p is ill-defined: ensure leading 0x. */
283 if (s[1] == 'X')
284 s[1] = 'x';
285 else if (s[1] != 'x') {
286 memmove(s+2, s, strlen(s)+1);
287 s[0] = '0';
288 s[1] = 'x';
289 }
Barry Warsawdadace02001-08-24 18:32:06 +0000290 s += strlen(s);
291 break;
292 case '%':
293 *s++ = '%';
294 break;
295 default:
296 strcpy(s, p);
297 s += strlen(s);
298 goto end;
299 }
300 } else
301 *s++ = *f;
302 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000303
Barry Warsawdadace02001-08-24 18:32:06 +0000304 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000305 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000306 return string;
307}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000308
Barry Warsawdadace02001-08-24 18:32:06 +0000309PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000310PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000311{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000312 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000313 va_list vargs;
314
315#ifdef HAVE_STDARG_PROTOTYPES
316 va_start(vargs, format);
317#else
318 va_start(vargs);
319#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000320 ret = PyString_FromFormatV(format, vargs);
321 va_end(vargs);
322 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000323}
324
325
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000326PyObject *PyString_Decode(const char *s,
327 int size,
328 const char *encoding,
329 const char *errors)
330{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000331 PyObject *v, *str;
332
333 str = PyString_FromStringAndSize(s, size);
334 if (str == NULL)
335 return NULL;
336 v = PyString_AsDecodedString(str, encoding, errors);
337 Py_DECREF(str);
338 return v;
339}
340
341PyObject *PyString_AsDecodedObject(PyObject *str,
342 const char *encoding,
343 const char *errors)
344{
345 PyObject *v;
346
347 if (!PyString_Check(str)) {
348 PyErr_BadArgument();
349 goto onError;
350 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000351
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000352 if (encoding == NULL) {
353#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000355#else
356 PyErr_SetString(PyExc_ValueError, "no encoding specified");
357 goto onError;
358#endif
359 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360
361 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000362 v = PyCodec_Decode(str, encoding, errors);
363 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365
366 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000367
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000368 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000369 return NULL;
370}
371
372PyObject *PyString_AsDecodedString(PyObject *str,
373 const char *encoding,
374 const char *errors)
375{
376 PyObject *v;
377
378 v = PyString_AsDecodedObject(str, encoding, errors);
379 if (v == NULL)
380 goto onError;
381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000383 /* Convert Unicode to a string using the default encoding */
384 if (PyUnicode_Check(v)) {
385 PyObject *temp = v;
386 v = PyUnicode_AsEncodedString(v, NULL, NULL);
387 Py_DECREF(temp);
388 if (v == NULL)
389 goto onError;
390 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000391#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 if (!PyString_Check(v)) {
393 PyErr_Format(PyExc_TypeError,
394 "decoder did not return a string object (type=%.400s)",
395 v->ob_type->tp_name);
396 Py_DECREF(v);
397 goto onError;
398 }
399
400 return v;
401
402 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 return NULL;
404}
405
406PyObject *PyString_Encode(const char *s,
407 int size,
408 const char *encoding,
409 const char *errors)
410{
411 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000412
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000413 str = PyString_FromStringAndSize(s, size);
414 if (str == NULL)
415 return NULL;
416 v = PyString_AsEncodedString(str, encoding, errors);
417 Py_DECREF(str);
418 return v;
419}
420
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000422 const char *encoding,
423 const char *errors)
424{
425 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000426
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 if (!PyString_Check(str)) {
428 PyErr_BadArgument();
429 goto onError;
430 }
431
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000432 if (encoding == NULL) {
433#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000435#else
436 PyErr_SetString(PyExc_ValueError, "no encoding specified");
437 goto onError;
438#endif
439 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000440
441 /* Encode via the codec registry */
442 v = PyCodec_Encode(str, encoding, errors);
443 if (v == NULL)
444 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000445
446 return v;
447
448 onError:
449 return NULL;
450}
451
452PyObject *PyString_AsEncodedString(PyObject *str,
453 const char *encoding,
454 const char *errors)
455{
456 PyObject *v;
457
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000458 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000459 if (v == NULL)
460 goto onError;
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 /* Convert Unicode to a string using the default encoding */
464 if (PyUnicode_Check(v)) {
465 PyObject *temp = v;
466 v = PyUnicode_AsEncodedString(v, NULL, NULL);
467 Py_DECREF(temp);
468 if (v == NULL)
469 goto onError;
470 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000471#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000472 if (!PyString_Check(v)) {
473 PyErr_Format(PyExc_TypeError,
474 "encoder did not return a string object (type=%.400s)",
475 v->ob_type->tp_name);
476 Py_DECREF(v);
477 goto onError;
478 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000479
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000480 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000481
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000482 onError:
483 return NULL;
484}
485
Guido van Rossum234f9421993-06-17 12:35:49 +0000486static void
Fred Drakeba096332000-07-09 07:04:36 +0000487string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000488{
Guido van Rossum9475a232001-10-05 20:51:39 +0000489 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000490}
491
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000492static int
493string_getsize(register PyObject *op)
494{
495 char *s;
496 int len;
497 if (PyString_AsStringAndSize(op, &s, &len))
498 return -1;
499 return len;
500}
501
502static /*const*/ char *
503string_getbuffer(register PyObject *op)
504{
505 char *s;
506 int len;
507 if (PyString_AsStringAndSize(op, &s, &len))
508 return NULL;
509 return s;
510}
511
Guido van Rossumd7047b31995-01-02 19:07:15 +0000512int
Fred Drakeba096332000-07-09 07:04:36 +0000513PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000515 if (!PyString_Check(op))
516 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000517 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518}
519
520/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000521PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000523 if (!PyString_Check(op))
524 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000525 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000526}
527
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000528int
529PyString_AsStringAndSize(register PyObject *obj,
530 register char **s,
531 register int *len)
532{
533 if (s == NULL) {
534 PyErr_BadInternalCall();
535 return -1;
536 }
537
538 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000539#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000540 if (PyUnicode_Check(obj)) {
541 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
542 if (obj == NULL)
543 return -1;
544 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000545 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000546#endif
547 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000548 PyErr_Format(PyExc_TypeError,
549 "expected string or Unicode object, "
550 "%.200s found", obj->ob_type->tp_name);
551 return -1;
552 }
553 }
554
555 *s = PyString_AS_STRING(obj);
556 if (len != NULL)
557 *len = PyString_GET_SIZE(obj);
558 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
559 PyErr_SetString(PyExc_TypeError,
560 "expected string without null bytes");
561 return -1;
562 }
563 return 0;
564}
565
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566/* Methods */
567
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000568static int
Fred Drakeba096332000-07-09 07:04:36 +0000569string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000570{
571 int i;
572 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000573 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000574
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000575 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000576 if (! PyString_CheckExact(op)) {
577 int ret;
578 /* A str subclass may have its own __str__ method. */
579 op = (PyStringObject *) PyObject_Str((PyObject *)op);
580 if (op == NULL)
581 return -1;
582 ret = string_print(op, fp, flags);
583 Py_DECREF(op);
584 return ret;
585 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000587 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000588 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000589 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000590
Thomas Wouters7e474022000-07-16 12:04:32 +0000591 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000592 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000593 if (memchr(op->ob_sval, '\'', op->ob_size) &&
594 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000595 quote = '"';
596
597 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000598 for (i = 0; i < op->ob_size; i++) {
599 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000600 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000601 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000602 else if (c == '\t')
603 fprintf(fp, "\\t");
604 else if (c == '\n')
605 fprintf(fp, "\\n");
606 else if (c == '\r')
607 fprintf(fp, "\\r");
608 else if (c < ' ' || c >= 0x7f)
609 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000610 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000611 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000612 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000613 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000614 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615}
616
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000617static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000618string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000619{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000620 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
621 PyObject *v;
622 if (newsize > INT_MAX) {
623 PyErr_SetString(PyExc_OverflowError,
624 "string is too large to make repr");
625 }
626 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000627 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000628 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000629 }
630 else {
631 register int i;
632 register char c;
633 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000634 int quote;
635
Thomas Wouters7e474022000-07-16 12:04:32 +0000636 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000637 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000638 if (memchr(op->ob_sval, '\'', op->ob_size) &&
639 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000640 quote = '"';
641
Tim Peters9161c8b2001-12-03 01:55:38 +0000642 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000643 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000645 /* There's at least enough room for a hex escape
646 and a closing quote. */
647 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000649 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000651 else if (c == '\t')
652 *p++ = '\\', *p++ = 't';
653 else if (c == '\n')
654 *p++ = '\\', *p++ = 'n';
655 else if (c == '\r')
656 *p++ = '\\', *p++ = 'r';
657 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000658 /* For performance, we don't want to call
659 PyOS_snprintf here (extra layers of
660 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000661 sprintf(p, "\\x%02x", c & 0xff);
662 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663 }
664 else
665 *p++ = c;
666 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000667 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000668 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000670 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000671 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000672 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000673 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000674}
675
Guido van Rossum189f1df2001-05-01 16:51:53 +0000676static PyObject *
677string_str(PyObject *s)
678{
Tim Petersc9933152001-10-16 20:18:24 +0000679 assert(PyString_Check(s));
680 if (PyString_CheckExact(s)) {
681 Py_INCREF(s);
682 return s;
683 }
684 else {
685 /* Subtype -- return genuine string with the same value. */
686 PyStringObject *t = (PyStringObject *) s;
687 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
688 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000689}
690
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000691static int
Fred Drakeba096332000-07-09 07:04:36 +0000692string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693{
694 return a->ob_size;
695}
696
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000697static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000698string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699{
700 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701 register PyStringObject *op;
702 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000703#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000704 if (PyUnicode_Check(bb))
705 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000706#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000707 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000708 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000709 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000710 return NULL;
711 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000712#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000713 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000714 if ((a->ob_size == 0 || b->ob_size == 0) &&
715 PyString_CheckExact(a) && PyString_CheckExact(b)) {
716 if (a->ob_size == 0) {
717 Py_INCREF(bb);
718 return bb;
719 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 Py_INCREF(a);
721 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722 }
723 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000724 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000725 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000726 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000727 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000729 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000730 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000731 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000732 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
733 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
734 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000736#undef b
737}
738
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000739static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000740string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000741{
742 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000743 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000744 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000745 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000746 if (n < 0)
747 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000748 /* watch out for overflows: the size can overflow int,
749 * and the # of bytes needed can overflow size_t
750 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000752 if (n && size / n != a->ob_size) {
753 PyErr_SetString(PyExc_OverflowError,
754 "repeated string is too long");
755 return NULL;
756 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000757 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000758 Py_INCREF(a);
759 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000760 }
Tim Peters8f422462000-09-09 06:13:41 +0000761 nbytes = size * sizeof(char);
762 if (nbytes / sizeof(char) != (size_t)size ||
763 nbytes + sizeof(PyStringObject) <= nbytes) {
764 PyErr_SetString(PyExc_OverflowError,
765 "repeated string is too long");
766 return NULL;
767 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000768 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000769 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000770 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000771 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000772 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000773 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000774 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000775 for (i = 0; i < size; i += a->ob_size)
776 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
777 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000778 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779}
780
781/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
782
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000783static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000784string_slice(register PyStringObject *a, register int i, register int j)
785 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000786{
787 if (i < 0)
788 i = 0;
789 if (j < 0)
790 j = 0; /* Avoid signed/unsigned bug in next line */
791 if (j > a->ob_size)
792 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000793 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
794 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000795 Py_INCREF(a);
796 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797 }
798 if (j < i)
799 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000800 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000801}
802
Guido van Rossum9284a572000-03-07 15:53:43 +0000803static int
Fred Drakeba096332000-07-09 07:04:36 +0000804string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000805{
806 register char *s, *end;
807 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000808#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000809 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000810 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000811#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000812 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000813 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000814 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000815 return -1;
816 }
817 c = PyString_AsString(el)[0];
818 s = PyString_AsString(a);
819 end = s + PyString_Size(a);
820 while (s < end) {
821 if (c == *s++)
822 return 1;
823 }
824 return 0;
825}
826
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000827static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000828string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000830 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000831 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000833 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 return NULL;
835 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000836 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000837 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000838 if (v == NULL)
839 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000840 else {
841#ifdef COUNT_ALLOCS
842 one_strings++;
843#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000844 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000845 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000846 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847}
848
Martin v. Löwiscd353062001-05-24 16:56:35 +0000849static PyObject*
850string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000852 int c;
853 int len_a, len_b;
854 int min_len;
855 PyObject *result;
856
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000857 /* Make sure both arguments are strings. */
858 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000859 result = Py_NotImplemented;
860 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000861 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000862 if (a == b) {
863 switch (op) {
864 case Py_EQ:case Py_LE:case Py_GE:
865 result = Py_True;
866 goto out;
867 case Py_NE:case Py_LT:case Py_GT:
868 result = Py_False;
869 goto out;
870 }
871 }
872 if (op == Py_EQ) {
873 /* Supporting Py_NE here as well does not save
874 much time, since Py_NE is rarely used. */
875 if (a->ob_size == b->ob_size
876 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000877 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +0000878 a->ob_size) == 0)) {
879 result = Py_True;
880 } else {
881 result = Py_False;
882 }
883 goto out;
884 }
885 len_a = a->ob_size; len_b = b->ob_size;
886 min_len = (len_a < len_b) ? len_a : len_b;
887 if (min_len > 0) {
888 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
889 if (c==0)
890 c = memcmp(a->ob_sval, b->ob_sval, min_len);
891 }else
892 c = 0;
893 if (c == 0)
894 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
895 switch (op) {
896 case Py_LT: c = c < 0; break;
897 case Py_LE: c = c <= 0; break;
898 case Py_EQ: assert(0); break; /* unreachable */
899 case Py_NE: c = c != 0; break;
900 case Py_GT: c = c > 0; break;
901 case Py_GE: c = c >= 0; break;
902 default:
903 result = Py_NotImplemented;
904 goto out;
905 }
906 result = c ? Py_True : Py_False;
907 out:
908 Py_INCREF(result);
909 return result;
910}
911
912int
913_PyString_Eq(PyObject *o1, PyObject *o2)
914{
915 PyStringObject *a, *b;
916 a = (PyStringObject*)o1;
917 b = (PyStringObject*)o2;
918 return a->ob_size == b->ob_size
919 && *a->ob_sval == *b->ob_sval
920 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000921}
922
Guido van Rossum9bfef441993-03-29 10:43:31 +0000923static long
Fred Drakeba096332000-07-09 07:04:36 +0000924string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000925{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000926 register int len;
927 register unsigned char *p;
928 register long x;
929
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000930 if (a->ob_shash != -1)
931 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000932 if (a->ob_sinterned != NULL)
933 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000935 len = a->ob_size;
936 p = (unsigned char *) a->ob_sval;
937 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000938 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000939 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000940 x ^= a->ob_size;
941 if (x == -1)
942 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000943 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000944 return x;
945}
946
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000947static PyObject*
948string_subscript(PyStringObject* self, PyObject* item)
949{
950 if (PyInt_Check(item)) {
951 long i = PyInt_AS_LONG(item);
952 if (i < 0)
953 i += PyString_GET_SIZE(self);
954 return string_item(self,i);
955 }
956 else if (PyLong_Check(item)) {
957 long i = PyLong_AsLong(item);
958 if (i == -1 && PyErr_Occurred())
959 return NULL;
960 if (i < 0)
961 i += PyString_GET_SIZE(self);
962 return string_item(self,i);
963 }
964 else if (PySlice_Check(item)) {
965 int start, stop, step, slicelength, cur, i;
966 char* source_buf;
967 char* result_buf;
968 PyObject* result;
969
970 if (PySlice_GetIndicesEx((PySliceObject*)item,
971 PyString_GET_SIZE(self),
972 &start, &stop, &step, &slicelength) < 0) {
973 return NULL;
974 }
975
976 if (slicelength <= 0) {
977 return PyString_FromStringAndSize("", 0);
978 }
979 else {
980 source_buf = PyString_AsString((PyObject*)self);
981 result_buf = PyMem_Malloc(slicelength);
982
983 for (cur = start, i = 0; i < slicelength;
984 cur += step, i++) {
985 result_buf[i] = source_buf[cur];
986 }
987
988 result = PyString_FromStringAndSize(result_buf,
989 slicelength);
990 PyMem_Free(result_buf);
991 return result;
992 }
993 }
994 else {
995 PyErr_SetString(PyExc_TypeError,
996 "string indices must be integers");
997 return NULL;
998 }
999}
1000
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001001static int
Fred Drakeba096332000-07-09 07:04:36 +00001002string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001003{
1004 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001005 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001006 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001007 return -1;
1008 }
1009 *ptr = (void *)self->ob_sval;
1010 return self->ob_size;
1011}
1012
1013static int
Fred Drakeba096332000-07-09 07:04:36 +00001014string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001015{
Guido van Rossum045e6881997-09-08 18:30:11 +00001016 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001017 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001018 return -1;
1019}
1020
1021static int
Fred Drakeba096332000-07-09 07:04:36 +00001022string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001023{
1024 if ( lenp )
1025 *lenp = self->ob_size;
1026 return 1;
1027}
1028
Guido van Rossum1db70701998-10-08 02:18:52 +00001029static int
Fred Drakeba096332000-07-09 07:04:36 +00001030string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001031{
1032 if ( index != 0 ) {
1033 PyErr_SetString(PyExc_SystemError,
1034 "accessing non-existent string segment");
1035 return -1;
1036 }
1037 *ptr = self->ob_sval;
1038 return self->ob_size;
1039}
1040
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001041static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001042 (inquiry)string_length, /*sq_length*/
1043 (binaryfunc)string_concat, /*sq_concat*/
1044 (intargfunc)string_repeat, /*sq_repeat*/
1045 (intargfunc)string_item, /*sq_item*/
1046 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001047 0, /*sq_ass_item*/
1048 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001049 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050};
1051
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001052static PyMappingMethods string_as_mapping = {
1053 (inquiry)string_length,
1054 (binaryfunc)string_subscript,
1055 0,
1056};
1057
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001058static PyBufferProcs string_as_buffer = {
1059 (getreadbufferproc)string_buffer_getreadbuf,
1060 (getwritebufferproc)string_buffer_getwritebuf,
1061 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001062 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001063};
1064
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001065
1066
1067#define LEFTSTRIP 0
1068#define RIGHTSTRIP 1
1069#define BOTHSTRIP 2
1070
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001071/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001072static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1073
1074#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001075
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001076
1077static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001078split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001079{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001080 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001081 PyObject* item;
1082 PyObject *list = PyList_New(0);
1083
1084 if (list == NULL)
1085 return NULL;
1086
Guido van Rossum4c08d552000-03-10 22:55:18 +00001087 for (i = j = 0; i < len; ) {
1088 while (i < len && isspace(Py_CHARMASK(s[i])))
1089 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001090 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001091 while (i < len && !isspace(Py_CHARMASK(s[i])))
1092 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001093 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001094 if (maxsplit-- <= 0)
1095 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001096 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1097 if (item == NULL)
1098 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001099 err = PyList_Append(list, item);
1100 Py_DECREF(item);
1101 if (err < 0)
1102 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001103 while (i < len && isspace(Py_CHARMASK(s[i])))
1104 i++;
1105 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001106 }
1107 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001108 if (j < len) {
1109 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1110 if (item == NULL)
1111 goto finally;
1112 err = PyList_Append(list, item);
1113 Py_DECREF(item);
1114 if (err < 0)
1115 goto finally;
1116 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001117 return list;
1118 finally:
1119 Py_DECREF(list);
1120 return NULL;
1121}
1122
1123
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001124PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001125"S.split([sep [,maxsplit]]) -> list of strings\n\
1126\n\
1127Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001128delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001129splits are done. If sep is not specified or is None, any\n\
1130whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001131
1132static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001133string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001134{
1135 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001136 int maxsplit = -1;
1137 const char *s = PyString_AS_STRING(self), *sub;
1138 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001139
Guido van Rossum4c08d552000-03-10 22:55:18 +00001140 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001142 if (maxsplit < 0)
1143 maxsplit = INT_MAX;
1144 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001145 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001146 if (PyString_Check(subobj)) {
1147 sub = PyString_AS_STRING(subobj);
1148 n = PyString_GET_SIZE(subobj);
1149 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001150#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001151 else if (PyUnicode_Check(subobj))
1152 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001153#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001154 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1155 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001156 if (n == 0) {
1157 PyErr_SetString(PyExc_ValueError, "empty separator");
1158 return NULL;
1159 }
1160
1161 list = PyList_New(0);
1162 if (list == NULL)
1163 return NULL;
1164
1165 i = j = 0;
1166 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001167 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001168 if (maxsplit-- <= 0)
1169 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001170 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1171 if (item == NULL)
1172 goto fail;
1173 err = PyList_Append(list, item);
1174 Py_DECREF(item);
1175 if (err < 0)
1176 goto fail;
1177 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001178 }
1179 else
1180 i++;
1181 }
1182 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1183 if (item == NULL)
1184 goto fail;
1185 err = PyList_Append(list, item);
1186 Py_DECREF(item);
1187 if (err < 0)
1188 goto fail;
1189
1190 return list;
1191
1192 fail:
1193 Py_DECREF(list);
1194 return NULL;
1195}
1196
1197
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001198PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001199"S.join(sequence) -> string\n\
1200\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001201Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001202sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001203
1204static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001205string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001206{
1207 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001208 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001209 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001210 char *p;
1211 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001212 size_t sz = 0;
1213 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001214 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001215
Tim Peters19fe14e2001-01-19 03:03:47 +00001216 seq = PySequence_Fast(orig, "");
1217 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001218 if (PyErr_ExceptionMatches(PyExc_TypeError))
1219 PyErr_Format(PyExc_TypeError,
1220 "sequence expected, %.80s found",
1221 orig->ob_type->tp_name);
1222 return NULL;
1223 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001224
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001225 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001226 if (seqlen == 0) {
1227 Py_DECREF(seq);
1228 return PyString_FromString("");
1229 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001231 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001232 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1233 PyErr_Format(PyExc_TypeError,
1234 "sequence item 0: expected string,"
1235 " %.80s found",
1236 item->ob_type->tp_name);
1237 Py_DECREF(seq);
1238 return NULL;
1239 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001240 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001241 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001242 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001243 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001244
Tim Peters19fe14e2001-01-19 03:03:47 +00001245 /* There are at least two things to join. Do a pre-pass to figure out
1246 * the total amount of space we'll need (sz), see whether any argument
1247 * is absurd, and defer to the Unicode join if appropriate.
1248 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001249 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001250 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001251 item = PySequence_Fast_GET_ITEM(seq, i);
1252 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001253#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001254 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001255 /* Defer to Unicode join.
1256 * CAUTION: There's no gurantee that the
1257 * original sequence can be iterated over
1258 * again, so we must pass seq here.
1259 */
1260 PyObject *result;
1261 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001262 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001263 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001264 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001265#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001266 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001267 "sequence item %i: expected string,"
1268 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001269 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001270 Py_DECREF(seq);
1271 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001272 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001273 sz += PyString_GET_SIZE(item);
1274 if (i != 0)
1275 sz += seplen;
1276 if (sz < old_sz || sz > INT_MAX) {
1277 PyErr_SetString(PyExc_OverflowError,
1278 "join() is too long for a Python string");
1279 Py_DECREF(seq);
1280 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001282 }
1283
1284 /* Allocate result space. */
1285 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1286 if (res == NULL) {
1287 Py_DECREF(seq);
1288 return NULL;
1289 }
1290
1291 /* Catenate everything. */
1292 p = PyString_AS_STRING(res);
1293 for (i = 0; i < seqlen; ++i) {
1294 size_t n;
1295 item = PySequence_Fast_GET_ITEM(seq, i);
1296 n = PyString_GET_SIZE(item);
1297 memcpy(p, PyString_AS_STRING(item), n);
1298 p += n;
1299 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001300 memcpy(p, sep, seplen);
1301 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001302 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001304
Jeremy Hylton49048292000-07-11 03:28:17 +00001305 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001306 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001307}
1308
Tim Peters52e155e2001-06-16 05:42:57 +00001309PyObject *
1310_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001311{
Tim Petersa7259592001-06-16 05:11:17 +00001312 assert(sep != NULL && PyString_Check(sep));
1313 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001314 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001315}
1316
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001317static void
1318string_adjust_indices(int *start, int *end, int len)
1319{
1320 if (*end > len)
1321 *end = len;
1322 else if (*end < 0)
1323 *end += len;
1324 if (*end < 0)
1325 *end = 0;
1326 if (*start < 0)
1327 *start += len;
1328 if (*start < 0)
1329 *start = 0;
1330}
1331
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332static long
Fred Drakeba096332000-07-09 07:04:36 +00001333string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336 int len = PyString_GET_SIZE(self);
1337 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001338 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001340 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001341 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001342 return -2;
1343 if (PyString_Check(subobj)) {
1344 sub = PyString_AS_STRING(subobj);
1345 n = PyString_GET_SIZE(subobj);
1346 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001347#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001348 else if (PyUnicode_Check(subobj))
1349 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001350#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001351 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352 return -2;
1353
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001354 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001355
Guido van Rossum4c08d552000-03-10 22:55:18 +00001356 if (dir > 0) {
1357 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 last -= n;
1360 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001361 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001362 return (long)i;
1363 }
1364 else {
1365 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001366
Guido van Rossum4c08d552000-03-10 22:55:18 +00001367 if (n == 0 && i <= last)
1368 return (long)last;
1369 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001370 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001371 return (long)j;
1372 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001373
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374 return -1;
1375}
1376
1377
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001378PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379"S.find(sub [,start [,end]]) -> int\n\
1380\n\
1381Return the lowest index in S where substring sub is found,\n\
1382such that sub is contained within s[start,end]. Optional\n\
1383arguments start and end are interpreted as in slice notation.\n\
1384\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001385Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001386
1387static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001388string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001390 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 if (result == -2)
1392 return NULL;
1393 return PyInt_FromLong(result);
1394}
1395
1396
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001397PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398"S.index(sub [,start [,end]]) -> int\n\
1399\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001400Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401
1402static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001403string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001405 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406 if (result == -2)
1407 return NULL;
1408 if (result == -1) {
1409 PyErr_SetString(PyExc_ValueError,
1410 "substring not found in string.index");
1411 return NULL;
1412 }
1413 return PyInt_FromLong(result);
1414}
1415
1416
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001417PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418"S.rfind(sub [,start [,end]]) -> int\n\
1419\n\
1420Return the highest index in S where substring sub is found,\n\
1421such that sub is contained within s[start,end]. Optional\n\
1422arguments start and end are interpreted as in slice notation.\n\
1423\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001424Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425
1426static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001427string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001428{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001429 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001430 if (result == -2)
1431 return NULL;
1432 return PyInt_FromLong(result);
1433}
1434
1435
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001436PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437"S.rindex(sub [,start [,end]]) -> int\n\
1438\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001439Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440
1441static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001442string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001444 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445 if (result == -2)
1446 return NULL;
1447 if (result == -1) {
1448 PyErr_SetString(PyExc_ValueError,
1449 "substring not found in string.rindex");
1450 return NULL;
1451 }
1452 return PyInt_FromLong(result);
1453}
1454
1455
1456static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001457do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1458{
1459 char *s = PyString_AS_STRING(self);
1460 int len = PyString_GET_SIZE(self);
1461 char *sep = PyString_AS_STRING(sepobj);
1462 int seplen = PyString_GET_SIZE(sepobj);
1463 int i, j;
1464
1465 i = 0;
1466 if (striptype != RIGHTSTRIP) {
1467 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1468 i++;
1469 }
1470 }
1471
1472 j = len;
1473 if (striptype != LEFTSTRIP) {
1474 do {
1475 j--;
1476 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1477 j++;
1478 }
1479
1480 if (i == 0 && j == len && PyString_CheckExact(self)) {
1481 Py_INCREF(self);
1482 return (PyObject*)self;
1483 }
1484 else
1485 return PyString_FromStringAndSize(s+i, j-i);
1486}
1487
1488
1489static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001490do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491{
1492 char *s = PyString_AS_STRING(self);
1493 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 i = 0;
1496 if (striptype != RIGHTSTRIP) {
1497 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1498 i++;
1499 }
1500 }
1501
1502 j = len;
1503 if (striptype != LEFTSTRIP) {
1504 do {
1505 j--;
1506 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1507 j++;
1508 }
1509
Tim Peters8fa5dd02001-09-12 02:18:30 +00001510 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511 Py_INCREF(self);
1512 return (PyObject*)self;
1513 }
1514 else
1515 return PyString_FromStringAndSize(s+i, j-i);
1516}
1517
1518
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001519static PyObject *
1520do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1521{
1522 PyObject *sep = NULL;
1523
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001524 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001525 return NULL;
1526
1527 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001528 if (PyString_Check(sep))
1529 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001530#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001531 else if (PyUnicode_Check(sep)) {
1532 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1533 PyObject *res;
1534 if (uniself==NULL)
1535 return NULL;
1536 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1537 striptype, sep);
1538 Py_DECREF(uniself);
1539 return res;
1540 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001541#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001542 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001543 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001544#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001545 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001546#else
1547 "%s arg must be None or str",
1548#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001549 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001550 return NULL;
1551 }
1552 return do_xstrip(self, striptype, sep);
1553 }
1554
1555 return do_strip(self, striptype);
1556}
1557
1558
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001559PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001560"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561\n\
1562Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001563whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001564If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001565If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566
1567static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001568string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001569{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001570 if (PyTuple_GET_SIZE(args) == 0)
1571 return do_strip(self, BOTHSTRIP); /* Common case */
1572 else
1573 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001574}
1575
1576
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001577PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001578"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001580Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001581If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001582If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583
1584static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001585string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001587 if (PyTuple_GET_SIZE(args) == 0)
1588 return do_strip(self, LEFTSTRIP); /* Common case */
1589 else
1590 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591}
1592
1593
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001594PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001595"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001597Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001598If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001599If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600
1601static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001602string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001604 if (PyTuple_GET_SIZE(args) == 0)
1605 return do_strip(self, RIGHTSTRIP); /* Common case */
1606 else
1607 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608}
1609
1610
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001611PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612"S.lower() -> string\n\
1613\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001614Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615
1616static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001617string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618{
1619 char *s = PyString_AS_STRING(self), *s_new;
1620 int i, n = PyString_GET_SIZE(self);
1621 PyObject *new;
1622
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623 new = PyString_FromStringAndSize(NULL, n);
1624 if (new == NULL)
1625 return NULL;
1626 s_new = PyString_AsString(new);
1627 for (i = 0; i < n; i++) {
1628 int c = Py_CHARMASK(*s++);
1629 if (isupper(c)) {
1630 *s_new = tolower(c);
1631 } else
1632 *s_new = c;
1633 s_new++;
1634 }
1635 return new;
1636}
1637
1638
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001639PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640"S.upper() -> string\n\
1641\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001642Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643
1644static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001645string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646{
1647 char *s = PyString_AS_STRING(self), *s_new;
1648 int i, n = PyString_GET_SIZE(self);
1649 PyObject *new;
1650
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651 new = PyString_FromStringAndSize(NULL, n);
1652 if (new == NULL)
1653 return NULL;
1654 s_new = PyString_AsString(new);
1655 for (i = 0; i < n; i++) {
1656 int c = Py_CHARMASK(*s++);
1657 if (islower(c)) {
1658 *s_new = toupper(c);
1659 } else
1660 *s_new = c;
1661 s_new++;
1662 }
1663 return new;
1664}
1665
1666
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001667PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668"S.title() -> string\n\
1669\n\
1670Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001671characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001672
1673static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001674string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001675{
1676 char *s = PyString_AS_STRING(self), *s_new;
1677 int i, n = PyString_GET_SIZE(self);
1678 int previous_is_cased = 0;
1679 PyObject *new;
1680
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681 new = PyString_FromStringAndSize(NULL, n);
1682 if (new == NULL)
1683 return NULL;
1684 s_new = PyString_AsString(new);
1685 for (i = 0; i < n; i++) {
1686 int c = Py_CHARMASK(*s++);
1687 if (islower(c)) {
1688 if (!previous_is_cased)
1689 c = toupper(c);
1690 previous_is_cased = 1;
1691 } else if (isupper(c)) {
1692 if (previous_is_cased)
1693 c = tolower(c);
1694 previous_is_cased = 1;
1695 } else
1696 previous_is_cased = 0;
1697 *s_new++ = c;
1698 }
1699 return new;
1700}
1701
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001702PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703"S.capitalize() -> string\n\
1704\n\
1705Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001706capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707
1708static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001709string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710{
1711 char *s = PyString_AS_STRING(self), *s_new;
1712 int i, n = PyString_GET_SIZE(self);
1713 PyObject *new;
1714
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001715 new = PyString_FromStringAndSize(NULL, n);
1716 if (new == NULL)
1717 return NULL;
1718 s_new = PyString_AsString(new);
1719 if (0 < n) {
1720 int c = Py_CHARMASK(*s++);
1721 if (islower(c))
1722 *s_new = toupper(c);
1723 else
1724 *s_new = c;
1725 s_new++;
1726 }
1727 for (i = 1; i < n; i++) {
1728 int c = Py_CHARMASK(*s++);
1729 if (isupper(c))
1730 *s_new = tolower(c);
1731 else
1732 *s_new = c;
1733 s_new++;
1734 }
1735 return new;
1736}
1737
1738
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001739PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740"S.count(sub[, start[, end]]) -> int\n\
1741\n\
1742Return the number of occurrences of substring sub in string\n\
1743S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001744interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745
1746static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001747string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001749 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750 int len = PyString_GET_SIZE(self), n;
1751 int i = 0, last = INT_MAX;
1752 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754
Guido van Rossumc6821402000-05-08 14:08:05 +00001755 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1756 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001758
Guido van Rossum4c08d552000-03-10 22:55:18 +00001759 if (PyString_Check(subobj)) {
1760 sub = PyString_AS_STRING(subobj);
1761 n = PyString_GET_SIZE(subobj);
1762 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001763#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001764 else if (PyUnicode_Check(subobj)) {
1765 int count;
1766 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1767 if (count == -1)
1768 return NULL;
1769 else
1770 return PyInt_FromLong((long) count);
1771 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001772#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001773 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1774 return NULL;
1775
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001776 string_adjust_indices(&i, &last, len);
1777
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778 m = last + 1 - n;
1779 if (n == 0)
1780 return PyInt_FromLong((long) (m-i));
1781
1782 r = 0;
1783 while (i < m) {
1784 if (!memcmp(s+i, sub, n)) {
1785 r++;
1786 i += n;
1787 } else {
1788 i++;
1789 }
1790 }
1791 return PyInt_FromLong((long) r);
1792}
1793
1794
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796"S.swapcase() -> string\n\
1797\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001798Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001799converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800
1801static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001802string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803{
1804 char *s = PyString_AS_STRING(self), *s_new;
1805 int i, n = PyString_GET_SIZE(self);
1806 PyObject *new;
1807
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808 new = PyString_FromStringAndSize(NULL, n);
1809 if (new == NULL)
1810 return NULL;
1811 s_new = PyString_AsString(new);
1812 for (i = 0; i < n; i++) {
1813 int c = Py_CHARMASK(*s++);
1814 if (islower(c)) {
1815 *s_new = toupper(c);
1816 }
1817 else if (isupper(c)) {
1818 *s_new = tolower(c);
1819 }
1820 else
1821 *s_new = c;
1822 s_new++;
1823 }
1824 return new;
1825}
1826
1827
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001828PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829"S.translate(table [,deletechars]) -> string\n\
1830\n\
1831Return a copy of the string S, where all characters occurring\n\
1832in the optional argument deletechars are removed, and the\n\
1833remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001834translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835
1836static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001837string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001838{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001839 register char *input, *output;
1840 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841 register int i, c, changed = 0;
1842 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001843 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844 int inlen, tablen, dellen = 0;
1845 PyObject *result;
1846 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001847 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848
Guido van Rossum4c08d552000-03-10 22:55:18 +00001849 if (!PyArg_ParseTuple(args, "O|O:translate",
1850 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001852
1853 if (PyString_Check(tableobj)) {
1854 table1 = PyString_AS_STRING(tableobj);
1855 tablen = PyString_GET_SIZE(tableobj);
1856 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001857#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001858 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001859 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001860 parameter; instead a mapping to None will cause characters
1861 to be deleted. */
1862 if (delobj != NULL) {
1863 PyErr_SetString(PyExc_TypeError,
1864 "deletions are implemented differently for unicode");
1865 return NULL;
1866 }
1867 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1868 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001869#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001872
1873 if (delobj != NULL) {
1874 if (PyString_Check(delobj)) {
1875 del_table = PyString_AS_STRING(delobj);
1876 dellen = PyString_GET_SIZE(delobj);
1877 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001878#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 else if (PyUnicode_Check(delobj)) {
1880 PyErr_SetString(PyExc_TypeError,
1881 "deletions are implemented differently for unicode");
1882 return NULL;
1883 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001884#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1886 return NULL;
1887
1888 if (tablen != 256) {
1889 PyErr_SetString(PyExc_ValueError,
1890 "translation table must be 256 characters long");
1891 return NULL;
1892 }
1893 }
1894 else {
1895 del_table = NULL;
1896 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897 }
1898
1899 table = table1;
1900 inlen = PyString_Size(input_obj);
1901 result = PyString_FromStringAndSize((char *)NULL, inlen);
1902 if (result == NULL)
1903 return NULL;
1904 output_start = output = PyString_AsString(result);
1905 input = PyString_AsString(input_obj);
1906
1907 if (dellen == 0) {
1908 /* If no deletions are required, use faster code */
1909 for (i = inlen; --i >= 0; ) {
1910 c = Py_CHARMASK(*input++);
1911 if (Py_CHARMASK((*output++ = table[c])) != c)
1912 changed = 1;
1913 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001914 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915 return result;
1916 Py_DECREF(result);
1917 Py_INCREF(input_obj);
1918 return input_obj;
1919 }
1920
1921 for (i = 0; i < 256; i++)
1922 trans_table[i] = Py_CHARMASK(table[i]);
1923
1924 for (i = 0; i < dellen; i++)
1925 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1926
1927 for (i = inlen; --i >= 0; ) {
1928 c = Py_CHARMASK(*input++);
1929 if (trans_table[c] != -1)
1930 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1931 continue;
1932 changed = 1;
1933 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001934 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935 Py_DECREF(result);
1936 Py_INCREF(input_obj);
1937 return input_obj;
1938 }
1939 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001940 if (inlen > 0)
1941 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942 return result;
1943}
1944
1945
1946/* What follows is used for implementing replace(). Perry Stoll. */
1947
1948/*
1949 mymemfind
1950
1951 strstr replacement for arbitrary blocks of memory.
1952
Barry Warsaw51ac5802000-03-20 16:36:48 +00001953 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954 contents of memory pointed to by PAT. Returns the index into MEM if
1955 found, or -1 if not found. If len of PAT is greater than length of
1956 MEM, the function returns -1.
1957*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001958static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001959mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960{
1961 register int ii;
1962
1963 /* pattern can not occur in the last pat_len-1 chars */
1964 len -= pat_len;
1965
1966 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001967 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001968 return ii;
1969 }
1970 }
1971 return -1;
1972}
1973
1974/*
1975 mymemcnt
1976
1977 Return the number of distinct times PAT is found in MEM.
1978 meaning mem=1111 and pat==11 returns 2.
1979 mem=11111 and pat==11 also return 2.
1980 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001981static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001982mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983{
1984 register int offset = 0;
1985 int nfound = 0;
1986
1987 while (len >= 0) {
1988 offset = mymemfind(mem, len, pat, pat_len);
1989 if (offset == -1)
1990 break;
1991 mem += offset + pat_len;
1992 len -= offset + pat_len;
1993 nfound++;
1994 }
1995 return nfound;
1996}
1997
1998/*
1999 mymemreplace
2000
Thomas Wouters7e474022000-07-16 12:04:32 +00002001 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002 replaced with SUB.
2003
Thomas Wouters7e474022000-07-16 12:04:32 +00002004 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005 of PAT in STR, then the original string is returned. Otherwise, a new
2006 string is allocated here and returned.
2007
2008 on return, out_len is:
2009 the length of output string, or
2010 -1 if the input string is returned, or
2011 unchanged if an error occurs (no memory).
2012
2013 return value is:
2014 the new string allocated locally, or
2015 NULL if an error occurred.
2016*/
2017static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002018mymemreplace(const char *str, int len, /* input string */
2019 const char *pat, int pat_len, /* pattern string to find */
2020 const char *sub, int sub_len, /* substitution string */
2021 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002022 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023{
2024 char *out_s;
2025 char *new_s;
2026 int nfound, offset, new_len;
2027
2028 if (len == 0 || pat_len > len)
2029 goto return_same;
2030
2031 /* find length of output string */
2032 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00002033 if (count < 0)
2034 count = INT_MAX;
2035 else if (nfound > count)
2036 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037 if (nfound == 0)
2038 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002039
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002041 if (new_len == 0) {
2042 /* Have to allocate something for the caller to free(). */
2043 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002044 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002045 return NULL;
2046 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002048 else {
2049 assert(new_len > 0);
2050 new_s = (char *)PyMem_MALLOC(new_len);
2051 if (new_s == NULL)
2052 return NULL;
2053 out_s = new_s;
2054
Tim Peters9c012af2001-05-10 00:32:57 +00002055 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00002056 /* find index of next instance of pattern */
2057 offset = mymemfind(str, len, pat, pat_len);
2058 if (offset == -1)
2059 break;
2060
2061 /* copy non matching part of input string */
2062 memcpy(new_s, str, offset);
2063 str += offset + pat_len;
2064 len -= offset + pat_len;
2065
2066 /* copy substitute into the output string */
2067 new_s += offset;
2068 memcpy(new_s, sub, sub_len);
2069 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002070 }
2071 /* copy any remaining values into output string */
2072 if (len > 0)
2073 memcpy(new_s, str, len);
2074 }
2075 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076 return out_s;
2077
2078 return_same:
2079 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002080 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081}
2082
2083
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002084PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085"S.replace (old, new[, maxsplit]) -> string\n\
2086\n\
2087Return a copy of string S with all occurrences of substring\n\
2088old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002089given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090
2091static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002092string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002094 const char *str = PyString_AS_STRING(self), *sub, *repl;
2095 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002096 const int len = PyString_GET_SIZE(self);
2097 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002100 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101
Guido van Rossum4c08d552000-03-10 22:55:18 +00002102 if (!PyArg_ParseTuple(args, "OO|i:replace",
2103 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002105
2106 if (PyString_Check(subobj)) {
2107 sub = PyString_AS_STRING(subobj);
2108 sub_len = PyString_GET_SIZE(subobj);
2109 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002110#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002112 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002113 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002114#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002115 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2116 return NULL;
2117
2118 if (PyString_Check(replobj)) {
2119 repl = PyString_AS_STRING(replobj);
2120 repl_len = PyString_GET_SIZE(replobj);
2121 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002122#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002123 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002124 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002125 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002126#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002127 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2128 return NULL;
2129
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002130 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002131 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132 return NULL;
2133 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135 if (new_s == NULL) {
2136 PyErr_NoMemory();
2137 return NULL;
2138 }
2139 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002140 if (PyString_CheckExact(self)) {
2141 /* we're returning another reference to self */
2142 new = (PyObject*)self;
2143 Py_INCREF(new);
2144 }
2145 else {
2146 new = PyString_FromStringAndSize(str, len);
2147 if (new == NULL)
2148 return NULL;
2149 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150 }
2151 else {
2152 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002153 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154 }
2155 return new;
2156}
2157
2158
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002159PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002160"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002162Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002164comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165
2166static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002167string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002169 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172 int plen;
2173 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002174 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176
Guido van Rossumc6821402000-05-08 14:08:05 +00002177 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2178 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002179 return NULL;
2180 if (PyString_Check(subobj)) {
2181 prefix = PyString_AS_STRING(subobj);
2182 plen = PyString_GET_SIZE(subobj);
2183 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002184#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002185 else if (PyUnicode_Check(subobj)) {
2186 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002187 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002188 subobj, start, end, -1);
2189 if (rc == -1)
2190 return NULL;
2191 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002192 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002193 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002194#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002195 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196 return NULL;
2197
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002198 string_adjust_indices(&start, &end, len);
2199
2200 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002201 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002203 if (end-start >= plen)
2204 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2205 else
2206 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207}
2208
2209
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002210PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002211"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002213Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002215comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002216
2217static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002218string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002220 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222 const char* suffix;
2223 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002225 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227
Guido van Rossumc6821402000-05-08 14:08:05 +00002228 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2229 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002230 return NULL;
2231 if (PyString_Check(subobj)) {
2232 suffix = PyString_AS_STRING(subobj);
2233 slen = PyString_GET_SIZE(subobj);
2234 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002235#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002236 else if (PyUnicode_Check(subobj)) {
2237 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002238 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002239 subobj, start, end, +1);
2240 if (rc == -1)
2241 return NULL;
2242 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002243 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002244 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002245#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 return NULL;
2248
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002249 string_adjust_indices(&start, &end, len);
2250
2251 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002252 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002254 if (end-slen > start)
2255 start = end - slen;
2256 if (end-start >= slen)
2257 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2258 else
2259 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260}
2261
2262
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002263PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002264"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002265\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002266Encodes S using the codec registered for encoding. encoding defaults\n\
2267to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002268handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002269a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002270
2271static PyObject *
2272string_encode(PyStringObject *self, PyObject *args)
2273{
2274 char *encoding = NULL;
2275 char *errors = NULL;
2276 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2277 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002278 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2279}
2280
2281
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002282PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002283"S.decode([encoding[,errors]]) -> object\n\
2284\n\
2285Decodes S using the codec registered for encoding. encoding defaults\n\
2286to the default encoding. errors may be given to set a different error\n\
2287handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002288a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002289
2290static PyObject *
2291string_decode(PyStringObject *self, PyObject *args)
2292{
2293 char *encoding = NULL;
2294 char *errors = NULL;
2295 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2296 return NULL;
2297 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002298}
2299
2300
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002301PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302"S.expandtabs([tabsize]) -> string\n\
2303\n\
2304Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002305If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306
2307static PyObject*
2308string_expandtabs(PyStringObject *self, PyObject *args)
2309{
2310 const char *e, *p;
2311 char *q;
2312 int i, j;
2313 PyObject *u;
2314 int tabsize = 8;
2315
2316 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2317 return NULL;
2318
Thomas Wouters7e474022000-07-16 12:04:32 +00002319 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002320 i = j = 0;
2321 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2322 for (p = PyString_AS_STRING(self); p < e; p++)
2323 if (*p == '\t') {
2324 if (tabsize > 0)
2325 j += tabsize - (j % tabsize);
2326 }
2327 else {
2328 j++;
2329 if (*p == '\n' || *p == '\r') {
2330 i += j;
2331 j = 0;
2332 }
2333 }
2334
2335 /* Second pass: create output string and fill it */
2336 u = PyString_FromStringAndSize(NULL, i + j);
2337 if (!u)
2338 return NULL;
2339
2340 j = 0;
2341 q = PyString_AS_STRING(u);
2342
2343 for (p = PyString_AS_STRING(self); p < e; p++)
2344 if (*p == '\t') {
2345 if (tabsize > 0) {
2346 i = tabsize - (j % tabsize);
2347 j += i;
2348 while (i--)
2349 *q++ = ' ';
2350 }
2351 }
2352 else {
2353 j++;
2354 *q++ = *p;
2355 if (*p == '\n' || *p == '\r')
2356 j = 0;
2357 }
2358
2359 return u;
2360}
2361
Tim Peters8fa5dd02001-09-12 02:18:30 +00002362static PyObject *
2363pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002364{
2365 PyObject *u;
2366
2367 if (left < 0)
2368 left = 0;
2369 if (right < 0)
2370 right = 0;
2371
Tim Peters8fa5dd02001-09-12 02:18:30 +00002372 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002373 Py_INCREF(self);
2374 return (PyObject *)self;
2375 }
2376
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002377 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 left + PyString_GET_SIZE(self) + right);
2379 if (u) {
2380 if (left)
2381 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002382 memcpy(PyString_AS_STRING(u) + left,
2383 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384 PyString_GET_SIZE(self));
2385 if (right)
2386 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2387 fill, right);
2388 }
2389
2390 return u;
2391}
2392
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002393PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002394"S.ljust(width) -> string\n"
2395"\n"
2396"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002397"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398
2399static PyObject *
2400string_ljust(PyStringObject *self, PyObject *args)
2401{
2402 int width;
2403 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2404 return NULL;
2405
Tim Peters8fa5dd02001-09-12 02:18:30 +00002406 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002407 Py_INCREF(self);
2408 return (PyObject*) self;
2409 }
2410
2411 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2412}
2413
2414
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002415PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002416"S.rjust(width) -> string\n"
2417"\n"
2418"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002419"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002420
2421static PyObject *
2422string_rjust(PyStringObject *self, PyObject *args)
2423{
2424 int width;
2425 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2426 return NULL;
2427
Tim Peters8fa5dd02001-09-12 02:18:30 +00002428 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429 Py_INCREF(self);
2430 return (PyObject*) self;
2431 }
2432
2433 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2434}
2435
2436
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002437PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002438"S.center(width) -> string\n"
2439"\n"
2440"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002441"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442
2443static PyObject *
2444string_center(PyStringObject *self, PyObject *args)
2445{
2446 int marg, left;
2447 int width;
2448
2449 if (!PyArg_ParseTuple(args, "i:center", &width))
2450 return NULL;
2451
Tim Peters8fa5dd02001-09-12 02:18:30 +00002452 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002453 Py_INCREF(self);
2454 return (PyObject*) self;
2455 }
2456
2457 marg = width - PyString_GET_SIZE(self);
2458 left = marg / 2 + (marg & width & 1);
2459
2460 return pad(self, left, marg - left, ' ');
2461}
2462
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002463PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002464"S.zfill(width) -> string\n"
2465"\n"
2466"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002467"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002468
2469static PyObject *
2470string_zfill(PyStringObject *self, PyObject *args)
2471{
2472 int fill;
2473 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002474 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002475
2476 int width;
2477 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2478 return NULL;
2479
2480 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002481 if (PyString_CheckExact(self)) {
2482 Py_INCREF(self);
2483 return (PyObject*) self;
2484 }
2485 else
2486 return PyString_FromStringAndSize(
2487 PyString_AS_STRING(self),
2488 PyString_GET_SIZE(self)
2489 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002490 }
2491
2492 fill = width - PyString_GET_SIZE(self);
2493
2494 s = pad(self, fill, 0, '0');
2495
2496 if (s == NULL)
2497 return NULL;
2498
2499 p = PyString_AS_STRING(s);
2500 if (p[fill] == '+' || p[fill] == '-') {
2501 /* move sign to beginning of string */
2502 p[0] = p[fill];
2503 p[fill] = '0';
2504 }
2505
2506 return (PyObject*) s;
2507}
2508
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002509PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002510"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002511"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002512"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002513"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002514
2515static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002516string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517{
Fred Drakeba096332000-07-09 07:04:36 +00002518 register const unsigned char *p
2519 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002520 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002521
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522 /* Shortcut for single character strings */
2523 if (PyString_GET_SIZE(self) == 1 &&
2524 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002525 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002527 /* Special case for empty strings */
2528 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002529 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002530
Guido van Rossum4c08d552000-03-10 22:55:18 +00002531 e = p + PyString_GET_SIZE(self);
2532 for (; p < e; p++) {
2533 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002534 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002535 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002536 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002537}
2538
2539
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002540PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002541"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002542\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002543Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002544and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002545
2546static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002547string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002548{
Fred Drakeba096332000-07-09 07:04:36 +00002549 register const unsigned char *p
2550 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002551 register const unsigned char *e;
2552
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002553 /* Shortcut for single character strings */
2554 if (PyString_GET_SIZE(self) == 1 &&
2555 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002556 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002557
2558 /* Special case for empty strings */
2559 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002560 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002561
2562 e = p + PyString_GET_SIZE(self);
2563 for (; p < e; p++) {
2564 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002565 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002566 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002567 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002568}
2569
2570
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002571PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002572"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002573\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002574Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002575and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002576
2577static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002578string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002579{
Fred Drakeba096332000-07-09 07:04:36 +00002580 register const unsigned char *p
2581 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002582 register const unsigned char *e;
2583
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002584 /* Shortcut for single character strings */
2585 if (PyString_GET_SIZE(self) == 1 &&
2586 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002587 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002588
2589 /* Special case for empty strings */
2590 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002591 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002592
2593 e = p + PyString_GET_SIZE(self);
2594 for (; p < e; p++) {
2595 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002596 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002597 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002598 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002599}
2600
2601
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002602PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002603"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002605Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002606False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002607
2608static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002609string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002610{
Fred Drakeba096332000-07-09 07:04:36 +00002611 register const unsigned char *p
2612 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002613 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614
Guido van Rossum4c08d552000-03-10 22:55:18 +00002615 /* Shortcut for single character strings */
2616 if (PyString_GET_SIZE(self) == 1 &&
2617 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002618 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002619
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002620 /* Special case for empty strings */
2621 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002622 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002623
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 e = p + PyString_GET_SIZE(self);
2625 for (; p < e; p++) {
2626 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002627 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002629 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002630}
2631
2632
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002633PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002634"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002636Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002637at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002638
2639static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002640string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002641{
Fred Drakeba096332000-07-09 07:04:36 +00002642 register const unsigned char *p
2643 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002644 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002645 int cased;
2646
Guido van Rossum4c08d552000-03-10 22:55:18 +00002647 /* Shortcut for single character strings */
2648 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002649 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002650
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002651 /* Special case for empty strings */
2652 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002653 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002654
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655 e = p + PyString_GET_SIZE(self);
2656 cased = 0;
2657 for (; p < e; p++) {
2658 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002659 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002660 else if (!cased && islower(*p))
2661 cased = 1;
2662 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002663 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002664}
2665
2666
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002667PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002668"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002670Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002671at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002672
2673static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002674string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002675{
Fred Drakeba096332000-07-09 07:04:36 +00002676 register const unsigned char *p
2677 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002678 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002679 int cased;
2680
Guido van Rossum4c08d552000-03-10 22:55:18 +00002681 /* Shortcut for single character strings */
2682 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002683 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002684
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002685 /* Special case for empty strings */
2686 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002687 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002688
Guido van Rossum4c08d552000-03-10 22:55:18 +00002689 e = p + PyString_GET_SIZE(self);
2690 cased = 0;
2691 for (; p < e; p++) {
2692 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002693 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002694 else if (!cased && isupper(*p))
2695 cased = 1;
2696 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002697 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002698}
2699
2700
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002701PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002702"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002703\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002704Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002705may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002706ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002707
2708static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002709string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002710{
Fred Drakeba096332000-07-09 07:04:36 +00002711 register const unsigned char *p
2712 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002713 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002714 int cased, previous_is_cased;
2715
Guido van Rossum4c08d552000-03-10 22:55:18 +00002716 /* Shortcut for single character strings */
2717 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002718 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002719
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002720 /* Special case for empty strings */
2721 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002722 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002723
Guido van Rossum4c08d552000-03-10 22:55:18 +00002724 e = p + PyString_GET_SIZE(self);
2725 cased = 0;
2726 previous_is_cased = 0;
2727 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002728 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002729
2730 if (isupper(ch)) {
2731 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002732 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002733 previous_is_cased = 1;
2734 cased = 1;
2735 }
2736 else if (islower(ch)) {
2737 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002738 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002739 previous_is_cased = 1;
2740 cased = 1;
2741 }
2742 else
2743 previous_is_cased = 0;
2744 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002745 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002746}
2747
2748
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002749PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002750"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002751\n\
2752Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002753Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002754is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002755
2756#define SPLIT_APPEND(data, left, right) \
2757 str = PyString_FromStringAndSize(data + left, right - left); \
2758 if (!str) \
2759 goto onError; \
2760 if (PyList_Append(list, str)) { \
2761 Py_DECREF(str); \
2762 goto onError; \
2763 } \
2764 else \
2765 Py_DECREF(str);
2766
2767static PyObject*
2768string_splitlines(PyStringObject *self, PyObject *args)
2769{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002770 register int i;
2771 register int j;
2772 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002773 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002774 PyObject *list;
2775 PyObject *str;
2776 char *data;
2777
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002778 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002779 return NULL;
2780
2781 data = PyString_AS_STRING(self);
2782 len = PyString_GET_SIZE(self);
2783
Guido van Rossum4c08d552000-03-10 22:55:18 +00002784 list = PyList_New(0);
2785 if (!list)
2786 goto onError;
2787
2788 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002789 int eol;
2790
Guido van Rossum4c08d552000-03-10 22:55:18 +00002791 /* Find a line and append it */
2792 while (i < len && data[i] != '\n' && data[i] != '\r')
2793 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002794
2795 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002796 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002797 if (i < len) {
2798 if (data[i] == '\r' && i + 1 < len &&
2799 data[i+1] == '\n')
2800 i += 2;
2801 else
2802 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002803 if (keepends)
2804 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002805 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002806 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002807 j = i;
2808 }
2809 if (j < len) {
2810 SPLIT_APPEND(data, j, len);
2811 }
2812
2813 return list;
2814
2815 onError:
2816 Py_DECREF(list);
2817 return NULL;
2818}
2819
2820#undef SPLIT_APPEND
2821
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002822
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002823static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002824string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002825 /* Counterparts of the obsolete stropmodule functions; except
2826 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002827 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2828 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2829 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2830 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002831 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2832 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2833 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2834 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2835 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2836 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2837 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002838 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
2839 capitalize__doc__},
2840 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2841 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2842 endswith__doc__},
2843 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2844 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2845 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2846 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2847 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2848 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2849 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2850 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2851 startswith__doc__},
2852 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2853 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
2854 swapcase__doc__},
2855 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2856 translate__doc__},
2857 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2858 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2859 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2860 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2861 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
2862 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2863 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2864 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
2865 expandtabs__doc__},
2866 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
2867 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002868 {NULL, NULL} /* sentinel */
2869};
2870
Jeremy Hylton938ace62002-07-17 16:30:39 +00002871static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002872str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2873
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002874static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002875string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002876{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002877 PyObject *x = NULL;
2878 static char *kwlist[] = {"object", 0};
2879
Guido van Rossumae960af2001-08-30 03:11:59 +00002880 if (type != &PyString_Type)
2881 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002882 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2883 return NULL;
2884 if (x == NULL)
2885 return PyString_FromString("");
2886 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002887}
2888
Guido van Rossumae960af2001-08-30 03:11:59 +00002889static PyObject *
2890str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2891{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002892 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002893 int n;
2894
2895 assert(PyType_IsSubtype(type, &PyString_Type));
2896 tmp = string_new(&PyString_Type, args, kwds);
2897 if (tmp == NULL)
2898 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002899 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002900 n = PyString_GET_SIZE(tmp);
2901 pnew = type->tp_alloc(type, n);
2902 if (pnew != NULL) {
2903 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002904 ((PyStringObject *)pnew)->ob_shash =
2905 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002906 ((PyStringObject *)pnew)->ob_sinterned =
2907 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002908 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002909 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002910 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002911}
2912
Guido van Rossumcacfc072002-05-24 19:01:59 +00002913static PyObject *
2914basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2915{
2916 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002917 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00002918 return NULL;
2919}
2920
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002921PyDoc_STRVAR(basestring_doc,
2922"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00002923
2924PyTypeObject PyBaseString_Type = {
2925 PyObject_HEAD_INIT(&PyType_Type)
2926 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002927 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00002928 0,
2929 0,
2930 0, /* tp_dealloc */
2931 0, /* tp_print */
2932 0, /* tp_getattr */
2933 0, /* tp_setattr */
2934 0, /* tp_compare */
2935 0, /* tp_repr */
2936 0, /* tp_as_number */
2937 0, /* tp_as_sequence */
2938 0, /* tp_as_mapping */
2939 0, /* tp_hash */
2940 0, /* tp_call */
2941 0, /* tp_str */
2942 0, /* tp_getattro */
2943 0, /* tp_setattro */
2944 0, /* tp_as_buffer */
2945 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2946 basestring_doc, /* tp_doc */
2947 0, /* tp_traverse */
2948 0, /* tp_clear */
2949 0, /* tp_richcompare */
2950 0, /* tp_weaklistoffset */
2951 0, /* tp_iter */
2952 0, /* tp_iternext */
2953 0, /* tp_methods */
2954 0, /* tp_members */
2955 0, /* tp_getset */
2956 &PyBaseObject_Type, /* tp_base */
2957 0, /* tp_dict */
2958 0, /* tp_descr_get */
2959 0, /* tp_descr_set */
2960 0, /* tp_dictoffset */
2961 0, /* tp_init */
2962 0, /* tp_alloc */
2963 basestring_new, /* tp_new */
2964 0, /* tp_free */
2965};
2966
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002967PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002968"str(object) -> string\n\
2969\n\
2970Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002971If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002972
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002973PyTypeObject PyString_Type = {
2974 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002975 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002976 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002977 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002978 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002979 (destructor)string_dealloc, /* tp_dealloc */
2980 (printfunc)string_print, /* tp_print */
2981 0, /* tp_getattr */
2982 0, /* tp_setattr */
2983 0, /* tp_compare */
2984 (reprfunc)string_repr, /* tp_repr */
2985 0, /* tp_as_number */
2986 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00002987 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002988 (hashfunc)string_hash, /* tp_hash */
2989 0, /* tp_call */
2990 (reprfunc)string_str, /* tp_str */
2991 PyObject_GenericGetAttr, /* tp_getattro */
2992 0, /* tp_setattro */
2993 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002994 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002995 string_doc, /* tp_doc */
2996 0, /* tp_traverse */
2997 0, /* tp_clear */
2998 (richcmpfunc)string_richcompare, /* tp_richcompare */
2999 0, /* tp_weaklistoffset */
3000 0, /* tp_iter */
3001 0, /* tp_iternext */
3002 string_methods, /* tp_methods */
3003 0, /* tp_members */
3004 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003005 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003006 0, /* tp_dict */
3007 0, /* tp_descr_get */
3008 0, /* tp_descr_set */
3009 0, /* tp_dictoffset */
3010 0, /* tp_init */
3011 0, /* tp_alloc */
3012 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003013 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003014};
3015
3016void
Fred Drakeba096332000-07-09 07:04:36 +00003017PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003018{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003019 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003020 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003021 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003022 if (w == NULL || !PyString_Check(*pv)) {
3023 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003024 *pv = NULL;
3025 return;
3026 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003027 v = string_concat((PyStringObject *) *pv, w);
3028 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003029 *pv = v;
3030}
3031
Guido van Rossum013142a1994-08-30 08:19:36 +00003032void
Fred Drakeba096332000-07-09 07:04:36 +00003033PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003034{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003035 PyString_Concat(pv, w);
3036 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003037}
3038
3039
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003040/* The following function breaks the notion that strings are immutable:
3041 it changes the size of a string. We get away with this only if there
3042 is only one module referencing the object. You can also think of it
3043 as creating a new string object and destroying the old one, only
3044 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003045 already be known to some other part of the code...
3046 Note that if there's not enough memory to resize the string, the original
3047 string object at *pv is deallocated, *pv is set to NULL, an "out of
3048 memory" exception is set, and -1 is returned. Else (on success) 0 is
3049 returned, and the value in *pv may or may not be the same as on input.
3050 As always, an extra byte is allocated for a trailing \0 byte (newsize
3051 does *not* include that), and a trailing \0 byte is stored.
3052*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003053
3054int
Fred Drakeba096332000-07-09 07:04:36 +00003055_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003056{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003057 register PyObject *v;
3058 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003059 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003060 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003061 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003062 Py_DECREF(v);
3063 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003064 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003065 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003066 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003067 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003068 _Py_ForgetReference(v);
3069 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003070 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003071 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003072 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003073 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003074 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003075 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003076 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003077 _Py_NewReference(*pv);
3078 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003079 sv->ob_size = newsize;
3080 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003081 return 0;
3082}
Guido van Rossume5372401993-03-16 12:15:04 +00003083
3084/* Helpers for formatstring */
3085
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003086static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003087getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003088{
3089 int argidx = *p_argidx;
3090 if (argidx < arglen) {
3091 (*p_argidx)++;
3092 if (arglen < 0)
3093 return args;
3094 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003095 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003097 PyErr_SetString(PyExc_TypeError,
3098 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003099 return NULL;
3100}
3101
Tim Peters38fd5b62000-09-21 05:43:11 +00003102/* Format codes
3103 * F_LJUST '-'
3104 * F_SIGN '+'
3105 * F_BLANK ' '
3106 * F_ALT '#'
3107 * F_ZERO '0'
3108 */
Guido van Rossume5372401993-03-16 12:15:04 +00003109#define F_LJUST (1<<0)
3110#define F_SIGN (1<<1)
3111#define F_BLANK (1<<2)
3112#define F_ALT (1<<3)
3113#define F_ZERO (1<<4)
3114
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003115static int
Fred Drakeba096332000-07-09 07:04:36 +00003116formatfloat(char *buf, size_t buflen, int flags,
3117 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003118{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003119 /* fmt = '%#.' + `prec` + `type`
3120 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003121 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003122 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003123 x = PyFloat_AsDouble(v);
3124 if (x == -1.0 && PyErr_Occurred()) {
3125 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003126 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003127 }
Guido van Rossume5372401993-03-16 12:15:04 +00003128 if (prec < 0)
3129 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003130 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3131 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003132 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3133 (flags&F_ALT) ? "#" : "",
3134 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003135 /* worst case length calc to ensure no buffer overrun:
3136 fmt = %#.<prec>g
3137 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003138 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003139 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3140 If prec=0 the effective precision is 1 (the leading digit is
3141 always given), therefore increase by one to 10+prec. */
3142 if (buflen <= (size_t)10 + (size_t)prec) {
3143 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003144 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003145 return -1;
3146 }
Tim Peters885d4572001-11-28 20:27:42 +00003147 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003148 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003149}
3150
Tim Peters38fd5b62000-09-21 05:43:11 +00003151/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3152 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3153 * Python's regular ints.
3154 * Return value: a new PyString*, or NULL if error.
3155 * . *pbuf is set to point into it,
3156 * *plen set to the # of chars following that.
3157 * Caller must decref it when done using pbuf.
3158 * The string starting at *pbuf is of the form
3159 * "-"? ("0x" | "0X")? digit+
3160 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003161 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003162 * There will be at least prec digits, zero-filled on the left if
3163 * necessary to get that many.
3164 * val object to be converted
3165 * flags bitmask of format flags; only F_ALT is looked at
3166 * prec minimum number of digits; 0-fill on left if needed
3167 * type a character in [duoxX]; u acts the same as d
3168 *
3169 * CAUTION: o, x and X conversions on regular ints can never
3170 * produce a '-' sign, but can for Python's unbounded ints.
3171 */
3172PyObject*
3173_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3174 char **pbuf, int *plen)
3175{
3176 PyObject *result = NULL;
3177 char *buf;
3178 int i;
3179 int sign; /* 1 if '-', else 0 */
3180 int len; /* number of characters */
3181 int numdigits; /* len == numnondigits + numdigits */
3182 int numnondigits = 0;
3183
3184 switch (type) {
3185 case 'd':
3186 case 'u':
3187 result = val->ob_type->tp_str(val);
3188 break;
3189 case 'o':
3190 result = val->ob_type->tp_as_number->nb_oct(val);
3191 break;
3192 case 'x':
3193 case 'X':
3194 numnondigits = 2;
3195 result = val->ob_type->tp_as_number->nb_hex(val);
3196 break;
3197 default:
3198 assert(!"'type' not in [duoxX]");
3199 }
3200 if (!result)
3201 return NULL;
3202
3203 /* To modify the string in-place, there can only be one reference. */
3204 if (result->ob_refcnt != 1) {
3205 PyErr_BadInternalCall();
3206 return NULL;
3207 }
3208 buf = PyString_AsString(result);
3209 len = PyString_Size(result);
3210 if (buf[len-1] == 'L') {
3211 --len;
3212 buf[len] = '\0';
3213 }
3214 sign = buf[0] == '-';
3215 numnondigits += sign;
3216 numdigits = len - numnondigits;
3217 assert(numdigits > 0);
3218
Tim Petersfff53252001-04-12 18:38:48 +00003219 /* Get rid of base marker unless F_ALT */
3220 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003221 /* Need to skip 0x, 0X or 0. */
3222 int skipped = 0;
3223 switch (type) {
3224 case 'o':
3225 assert(buf[sign] == '0');
3226 /* If 0 is only digit, leave it alone. */
3227 if (numdigits > 1) {
3228 skipped = 1;
3229 --numdigits;
3230 }
3231 break;
3232 case 'x':
3233 case 'X':
3234 assert(buf[sign] == '0');
3235 assert(buf[sign + 1] == 'x');
3236 skipped = 2;
3237 numnondigits -= 2;
3238 break;
3239 }
3240 if (skipped) {
3241 buf += skipped;
3242 len -= skipped;
3243 if (sign)
3244 buf[0] = '-';
3245 }
3246 assert(len == numnondigits + numdigits);
3247 assert(numdigits > 0);
3248 }
3249
3250 /* Fill with leading zeroes to meet minimum width. */
3251 if (prec > numdigits) {
3252 PyObject *r1 = PyString_FromStringAndSize(NULL,
3253 numnondigits + prec);
3254 char *b1;
3255 if (!r1) {
3256 Py_DECREF(result);
3257 return NULL;
3258 }
3259 b1 = PyString_AS_STRING(r1);
3260 for (i = 0; i < numnondigits; ++i)
3261 *b1++ = *buf++;
3262 for (i = 0; i < prec - numdigits; i++)
3263 *b1++ = '0';
3264 for (i = 0; i < numdigits; i++)
3265 *b1++ = *buf++;
3266 *b1 = '\0';
3267 Py_DECREF(result);
3268 result = r1;
3269 buf = PyString_AS_STRING(result);
3270 len = numnondigits + prec;
3271 }
3272
3273 /* Fix up case for hex conversions. */
3274 switch (type) {
3275 case 'x':
3276 /* Need to convert all upper case letters to lower case. */
3277 for (i = 0; i < len; i++)
3278 if (buf[i] >= 'A' && buf[i] <= 'F')
3279 buf[i] += 'a'-'A';
3280 break;
3281 case 'X':
3282 /* Need to convert 0x to 0X (and -0x to -0X). */
3283 if (buf[sign + 1] == 'x')
3284 buf[sign + 1] = 'X';
3285 break;
3286 }
3287 *pbuf = buf;
3288 *plen = len;
3289 return result;
3290}
3291
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003292static int
Fred Drakeba096332000-07-09 07:04:36 +00003293formatint(char *buf, size_t buflen, int flags,
3294 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003295{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003296 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003297 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3298 + 1 + 1 = 24 */
3299 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003300 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003301
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003302 x = PyInt_AsLong(v);
3303 if (x == -1 && PyErr_Occurred()) {
3304 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003305 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003306 }
Guido van Rossume5372401993-03-16 12:15:04 +00003307 if (prec < 0)
3308 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003309
3310 if ((flags & F_ALT) &&
3311 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003312 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003313 * of issues that cause pain:
3314 * - when 0 is being converted, the C standard leaves off
3315 * the '0x' or '0X', which is inconsistent with other
3316 * %#x/%#X conversions and inconsistent with Python's
3317 * hex() function
3318 * - there are platforms that violate the standard and
3319 * convert 0 with the '0x' or '0X'
3320 * (Metrowerks, Compaq Tru64)
3321 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003322 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003323 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003324 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003325 * We can achieve the desired consistency by inserting our
3326 * own '0x' or '0X' prefix, and substituting %x/%X in place
3327 * of %#x/%#X.
3328 *
3329 * Note that this is the same approach as used in
3330 * formatint() in unicodeobject.c
3331 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003332 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003333 type, prec, type);
3334 }
3335 else {
3336 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003337 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003338 prec, type);
3339 }
3340
Tim Peters38fd5b62000-09-21 05:43:11 +00003341 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003342 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3343 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003344 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003345 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003346 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003347 return -1;
3348 }
Tim Peters885d4572001-11-28 20:27:42 +00003349 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003350 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003351}
3352
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003353static int
Fred Drakeba096332000-07-09 07:04:36 +00003354formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003355{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003356 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003357 if (PyString_Check(v)) {
3358 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003359 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003360 }
3361 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003362 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003363 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003364 }
3365 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003366 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003367}
3368
Guido van Rossum013142a1994-08-30 08:19:36 +00003369
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003370/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3371
3372 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3373 chars are formatted. XXX This is a magic number. Each formatting
3374 routine does bounds checking to ensure no overflow, but a better
3375 solution may be to malloc a buffer of appropriate size for each
3376 format. For now, the current solution is sufficient.
3377*/
3378#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003379
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003380PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003381PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003382{
3383 char *fmt, *res;
3384 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003385 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003386 PyObject *result, *orig_args;
3387#ifdef Py_USING_UNICODE
3388 PyObject *v, *w;
3389#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003390 PyObject *dict = NULL;
3391 if (format == NULL || !PyString_Check(format) || args == NULL) {
3392 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003393 return NULL;
3394 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003395 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003396 fmt = PyString_AS_STRING(format);
3397 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003398 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003399 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003400 if (result == NULL)
3401 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003402 res = PyString_AsString(result);
3403 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003404 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003405 argidx = 0;
3406 }
3407 else {
3408 arglen = -1;
3409 argidx = -2;
3410 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003411 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003412 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003413 while (--fmtcnt >= 0) {
3414 if (*fmt != '%') {
3415 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003416 rescnt = fmtcnt + 100;
3417 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003418 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003419 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003420 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003421 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003422 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003423 }
3424 *res++ = *fmt++;
3425 }
3426 else {
3427 /* Got a format specifier */
3428 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003429 int width = -1;
3430 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003431 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003432 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003433 PyObject *v = NULL;
3434 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003435 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003436 int sign;
3437 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003438 char formatbuf[FORMATBUFLEN];
3439 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003440#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003441 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003442 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003443#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003444
Guido van Rossumda9c2711996-12-05 21:58:58 +00003445 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003446 if (*fmt == '(') {
3447 char *keystart;
3448 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003449 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003450 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003451
3452 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003453 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003454 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003455 goto error;
3456 }
3457 ++fmt;
3458 --fmtcnt;
3459 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003460 /* Skip over balanced parentheses */
3461 while (pcount > 0 && --fmtcnt >= 0) {
3462 if (*fmt == ')')
3463 --pcount;
3464 else if (*fmt == '(')
3465 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003466 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003467 }
3468 keylen = fmt - keystart - 1;
3469 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003470 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003471 "incomplete format key");
3472 goto error;
3473 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003474 key = PyString_FromStringAndSize(keystart,
3475 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003476 if (key == NULL)
3477 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003478 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003479 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003480 args_owned = 0;
3481 }
3482 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003483 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003484 if (args == NULL) {
3485 goto error;
3486 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003487 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003488 arglen = -1;
3489 argidx = -2;
3490 }
Guido van Rossume5372401993-03-16 12:15:04 +00003491 while (--fmtcnt >= 0) {
3492 switch (c = *fmt++) {
3493 case '-': flags |= F_LJUST; continue;
3494 case '+': flags |= F_SIGN; continue;
3495 case ' ': flags |= F_BLANK; continue;
3496 case '#': flags |= F_ALT; continue;
3497 case '0': flags |= F_ZERO; continue;
3498 }
3499 break;
3500 }
3501 if (c == '*') {
3502 v = getnextarg(args, arglen, &argidx);
3503 if (v == NULL)
3504 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003505 if (!PyInt_Check(v)) {
3506 PyErr_SetString(PyExc_TypeError,
3507 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003508 goto error;
3509 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003510 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003511 if (width < 0) {
3512 flags |= F_LJUST;
3513 width = -width;
3514 }
Guido van Rossume5372401993-03-16 12:15:04 +00003515 if (--fmtcnt >= 0)
3516 c = *fmt++;
3517 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003518 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003519 width = c - '0';
3520 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003521 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003522 if (!isdigit(c))
3523 break;
3524 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003525 PyErr_SetString(
3526 PyExc_ValueError,
3527 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003528 goto error;
3529 }
3530 width = width*10 + (c - '0');
3531 }
3532 }
3533 if (c == '.') {
3534 prec = 0;
3535 if (--fmtcnt >= 0)
3536 c = *fmt++;
3537 if (c == '*') {
3538 v = getnextarg(args, arglen, &argidx);
3539 if (v == NULL)
3540 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003541 if (!PyInt_Check(v)) {
3542 PyErr_SetString(
3543 PyExc_TypeError,
3544 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003545 goto error;
3546 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003547 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003548 if (prec < 0)
3549 prec = 0;
3550 if (--fmtcnt >= 0)
3551 c = *fmt++;
3552 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003553 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003554 prec = c - '0';
3555 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003556 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003557 if (!isdigit(c))
3558 break;
3559 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003560 PyErr_SetString(
3561 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003562 "prec too big");
3563 goto error;
3564 }
3565 prec = prec*10 + (c - '0');
3566 }
3567 }
3568 } /* prec */
3569 if (fmtcnt >= 0) {
3570 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003571 if (--fmtcnt >= 0)
3572 c = *fmt++;
3573 }
3574 }
3575 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003576 PyErr_SetString(PyExc_ValueError,
3577 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003578 goto error;
3579 }
3580 if (c != '%') {
3581 v = getnextarg(args, arglen, &argidx);
3582 if (v == NULL)
3583 goto error;
3584 }
3585 sign = 0;
3586 fill = ' ';
3587 switch (c) {
3588 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003589 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003590 len = 1;
3591 break;
3592 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003593 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003594#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003595 if (PyUnicode_Check(v)) {
3596 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003597 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003598 goto unicode;
3599 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003600#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003601 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003602 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003603 else
3604 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003605 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003606 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003607 if (!PyString_Check(temp)) {
3608 PyErr_SetString(PyExc_TypeError,
3609 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003610 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003611 goto error;
3612 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003613 pbuf = PyString_AS_STRING(temp);
3614 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003615 if (prec >= 0 && len > prec)
3616 len = prec;
3617 break;
3618 case 'i':
3619 case 'd':
3620 case 'u':
3621 case 'o':
3622 case 'x':
3623 case 'X':
3624 if (c == 'i')
3625 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003626 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003627 temp = _PyString_FormatLong(v, flags,
3628 prec, c, &pbuf, &len);
3629 if (!temp)
3630 goto error;
3631 /* unbounded ints can always produce
3632 a sign character! */
3633 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003634 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003635 else {
3636 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003637 len = formatint(pbuf,
3638 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003639 flags, prec, c, v);
3640 if (len < 0)
3641 goto error;
3642 /* only d conversion is signed */
3643 sign = c == 'd';
3644 }
3645 if (flags & F_ZERO)
3646 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003647 break;
3648 case 'e':
3649 case 'E':
3650 case 'f':
3651 case 'g':
3652 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003653 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003654 len = formatfloat(pbuf, sizeof(formatbuf),
3655 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003656 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003657 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003658 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003659 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003660 fill = '0';
3661 break;
3662 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003663 pbuf = formatbuf;
3664 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003665 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003666 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003667 break;
3668 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003669 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003670 "unsupported format character '%c' (0x%x) "
3671 "at index %i",
3672 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003673 goto error;
3674 }
3675 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003676 if (*pbuf == '-' || *pbuf == '+') {
3677 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003678 len--;
3679 }
3680 else if (flags & F_SIGN)
3681 sign = '+';
3682 else if (flags & F_BLANK)
3683 sign = ' ';
3684 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003685 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003686 }
3687 if (width < len)
3688 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003689 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003690 reslen -= rescnt;
3691 rescnt = width + fmtcnt + 100;
3692 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003693 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003694 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003695 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003696 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003697 }
3698 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003699 if (fill != ' ')
3700 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003701 rescnt--;
3702 if (width > len)
3703 width--;
3704 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003705 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3706 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003707 assert(pbuf[1] == c);
3708 if (fill != ' ') {
3709 *res++ = *pbuf++;
3710 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003711 }
Tim Petersfff53252001-04-12 18:38:48 +00003712 rescnt -= 2;
3713 width -= 2;
3714 if (width < 0)
3715 width = 0;
3716 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003717 }
3718 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003719 do {
3720 --rescnt;
3721 *res++ = fill;
3722 } while (--width > len);
3723 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003724 if (fill == ' ') {
3725 if (sign)
3726 *res++ = sign;
3727 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003728 (c == 'x' || c == 'X')) {
3729 assert(pbuf[0] == '0');
3730 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003731 *res++ = *pbuf++;
3732 *res++ = *pbuf++;
3733 }
3734 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003735 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003736 res += len;
3737 rescnt -= len;
3738 while (--width >= len) {
3739 --rescnt;
3740 *res++ = ' ';
3741 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003742 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003743 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003744 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003745 goto error;
3746 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003747 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003748 } /* '%' */
3749 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003750 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003751 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003752 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003753 goto error;
3754 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003755 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003756 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003757 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003758 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003759 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003760
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003761#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003762 unicode:
3763 if (args_owned) {
3764 Py_DECREF(args);
3765 args_owned = 0;
3766 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003767 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003768 if (PyTuple_Check(orig_args) && argidx > 0) {
3769 PyObject *v;
3770 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3771 v = PyTuple_New(n);
3772 if (v == NULL)
3773 goto error;
3774 while (--n >= 0) {
3775 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3776 Py_INCREF(w);
3777 PyTuple_SET_ITEM(v, n, w);
3778 }
3779 args = v;
3780 } else {
3781 Py_INCREF(orig_args);
3782 args = orig_args;
3783 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003784 args_owned = 1;
3785 /* Take what we have of the result and let the Unicode formatting
3786 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003787 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003788 if (_PyString_Resize(&result, rescnt))
3789 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003790 fmtcnt = PyString_GET_SIZE(format) - \
3791 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003792 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3793 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003794 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003795 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003796 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003797 if (v == NULL)
3798 goto error;
3799 /* Paste what we have (result) to what the Unicode formatting
3800 function returned (v) and return the result (or error) */
3801 w = PyUnicode_Concat(result, v);
3802 Py_DECREF(result);
3803 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003804 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003805 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003806#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003807
Guido van Rossume5372401993-03-16 12:15:04 +00003808 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003809 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003810 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003811 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003812 }
Guido van Rossume5372401993-03-16 12:15:04 +00003813 return NULL;
3814}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003815
3816
Guido van Rossum2a61e741997-01-18 07:55:05 +00003817
Barry Warsaw4df762f2000-08-16 23:41:01 +00003818/* This dictionary will leak at PyString_Fini() time. That's acceptable
3819 * because PyString_Fini() specifically frees interned strings that are
3820 * only referenced by this dictionary. The CVS log entry for revision 2.45
3821 * says:
3822 *
3823 * Change the Fini function to only remove otherwise unreferenced
3824 * strings from the interned table. There are references in
3825 * hard-to-find static variables all over the interpreter, and it's not
3826 * worth trying to get rid of all those; but "uninterning" isn't fair
3827 * either and may cause subtle failures later -- so we have to keep them
3828 * in the interned table.
3829 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003830static PyObject *interned;
3831
3832void
Fred Drakeba096332000-07-09 07:04:36 +00003833PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003834{
3835 register PyStringObject *s = (PyStringObject *)(*p);
3836 PyObject *t;
3837 if (s == NULL || !PyString_Check(s))
3838 Py_FatalError("PyString_InternInPlace: strings only please!");
3839 if ((t = s->ob_sinterned) != NULL) {
3840 if (t == (PyObject *)s)
3841 return;
3842 Py_INCREF(t);
3843 *p = t;
3844 Py_DECREF(s);
3845 return;
3846 }
3847 if (interned == NULL) {
3848 interned = PyDict_New();
3849 if (interned == NULL)
3850 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003851 }
3852 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3853 Py_INCREF(t);
3854 *p = s->ob_sinterned = t;
3855 Py_DECREF(s);
3856 return;
3857 }
Tim Peters111f6092001-09-12 07:54:51 +00003858 /* Ensure that only true string objects appear in the intern dict,
3859 and as the value of ob_sinterned. */
3860 if (PyString_CheckExact(s)) {
3861 t = (PyObject *)s;
3862 if (PyDict_SetItem(interned, t, t) == 0) {
3863 s->ob_sinterned = t;
3864 return;
3865 }
3866 }
3867 else {
3868 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3869 PyString_GET_SIZE(s));
3870 if (t != NULL) {
3871 if (PyDict_SetItem(interned, t, t) == 0) {
3872 *p = s->ob_sinterned = t;
3873 Py_DECREF(s);
3874 return;
3875 }
3876 Py_DECREF(t);
3877 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003878 }
3879 PyErr_Clear();
3880}
3881
3882
3883PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003884PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003885{
3886 PyObject *s = PyString_FromString(cp);
3887 if (s == NULL)
3888 return NULL;
3889 PyString_InternInPlace(&s);
3890 return s;
3891}
3892
Guido van Rossum8cf04761997-08-02 02:57:45 +00003893void
Fred Drakeba096332000-07-09 07:04:36 +00003894PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003895{
3896 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003897 for (i = 0; i < UCHAR_MAX + 1; i++) {
3898 Py_XDECREF(characters[i]);
3899 characters[i] = NULL;
3900 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003901 Py_XDECREF(nullstring);
3902 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003903 if (interned) {
3904 int pos, changed;
3905 PyObject *key, *value;
3906 do {
3907 changed = 0;
3908 pos = 0;
3909 while (PyDict_Next(interned, &pos, &key, &value)) {
3910 if (key->ob_refcnt == 2 && key == value) {
3911 PyDict_DelItem(interned, key);
3912 changed = 1;
3913 }
3914 }
3915 } while (changed);
3916 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003917}
Barry Warsawa903ad982001-02-23 16:40:48 +00003918
Barry Warsawa903ad982001-02-23 16:40:48 +00003919void _Py_ReleaseInternedStrings(void)
3920{
3921 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003922 fprintf(stderr, "releasing interned strings\n");
3923 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003924 Py_DECREF(interned);
3925 interned = NULL;
3926 }
3927}