blob: 5e40524a0aabbc26e5629e4b4dcd86ff907ed829 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
18/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000019 For both PyString_FromString() and PyString_FromStringAndSize(), the
20 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000021 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000022
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000023 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000025
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For PyString_FromStringAndSize(), the parameter the parameter `str' is
27 either NULL or else points to a string containing at least `size' bytes.
28 For PyString_FromStringAndSize(), the string in the `str' parameter does
29 not have to be null-terminated. (Therefore it is safe to construct a
30 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
31 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
32 bytes (setting the last byte to the null terminating character) and you can
33 fill in the data yourself. If `str' is non-NULL then the resulting
34 PyString object must be treated as immutable and you must not fill in nor
35 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 The PyObject member `op->ob_size', which denotes the number of "extra
38 items" in a variable-size object, will contain the number of bytes
39 allocated for string data, not counting the null terminating character. It
40 is therefore equal to the equal to the `size' parameter (for
41 PyString_FromStringAndSize()) or the length of the string in the `str'
42 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000067 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000123 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
159 count = vargs;
160#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000161 /* step 1: figure out how large a buffer we need */
162 for (f = format; *f; f++) {
163 if (*f == '%') {
164 const char* p = f;
165 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
166 ;
167
168 /* skip the 'l' in %ld, since it doesn't change the
169 width. although only %d is supported (see
170 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000171 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000172 if (*f == 'l' && *(f+1) == 'd')
173 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000174
Barry Warsawdadace02001-08-24 18:32:06 +0000175 switch (*f) {
176 case 'c':
177 (void)va_arg(count, int);
178 /* fall through... */
179 case '%':
180 n++;
181 break;
182 case 'd': case 'i': case 'x':
183 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000184 /* 20 bytes is enough to hold a 64-bit
185 integer. Decimal takes the most space.
186 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 n += 20;
188 break;
189 case 's':
190 s = va_arg(count, char*);
191 n += strlen(s);
192 break;
193 case 'p':
194 (void) va_arg(count, int);
195 /* maximum 64-bit pointer representation:
196 * 0xffffffffffffffff
197 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000198 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000199 */
200 n += 19;
201 break;
202 default:
203 /* if we stumble upon an unknown
204 formatting code, copy the rest of
205 the format string to the output
206 string. (we cannot just skip the
207 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000208 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000209 n += strlen(p);
210 goto expand;
211 }
212 } else
213 n++;
214 }
215 expand:
216 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000217 /* Since we've analyzed how much space we need for the worst case,
218 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 string = PyString_FromStringAndSize(NULL, n);
220 if (!string)
221 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000222
Barry Warsawdadace02001-08-24 18:32:06 +0000223 s = PyString_AsString(string);
224
225 for (f = format; *f; f++) {
226 if (*f == '%') {
227 const char* p = f++;
228 int i, longflag = 0;
229 /* parse the width.precision part (we're only
230 interested in the precision value, if any) */
231 n = 0;
232 while (isdigit(Py_CHARMASK(*f)))
233 n = (n*10) + *f++ - '0';
234 if (*f == '.') {
235 f++;
236 n = 0;
237 while (isdigit(Py_CHARMASK(*f)))
238 n = (n*10) + *f++ - '0';
239 }
240 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
241 f++;
242 /* handle the long flag, but only for %ld. others
243 can be added when necessary. */
244 if (*f == 'l' && *(f+1) == 'd') {
245 longflag = 1;
246 ++f;
247 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000248
Barry Warsawdadace02001-08-24 18:32:06 +0000249 switch (*f) {
250 case 'c':
251 *s++ = va_arg(vargs, int);
252 break;
253 case 'd':
254 if (longflag)
255 sprintf(s, "%ld", va_arg(vargs, long));
256 else
257 sprintf(s, "%d", va_arg(vargs, int));
258 s += strlen(s);
259 break;
260 case 'i':
261 sprintf(s, "%i", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'x':
265 sprintf(s, "%x", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 's':
269 p = va_arg(vargs, char*);
270 i = strlen(p);
271 if (n > 0 && i > n)
272 i = n;
273 memcpy(s, p, i);
274 s += i;
275 break;
276 case 'p':
277 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000278 /* %p is ill-defined: ensure leading 0x. */
279 if (s[1] == 'X')
280 s[1] = 'x';
281 else if (s[1] != 'x') {
282 memmove(s+2, s, strlen(s)+1);
283 s[0] = '0';
284 s[1] = 'x';
285 }
Barry Warsawdadace02001-08-24 18:32:06 +0000286 s += strlen(s);
287 break;
288 case '%':
289 *s++ = '%';
290 break;
291 default:
292 strcpy(s, p);
293 s += strlen(s);
294 goto end;
295 }
296 } else
297 *s++ = *f;
298 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000299
Barry Warsawdadace02001-08-24 18:32:06 +0000300 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000301 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000302 return string;
303}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000304
Barry Warsawdadace02001-08-24 18:32:06 +0000305PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000306PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000307{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000308 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000309 va_list vargs;
310
311#ifdef HAVE_STDARG_PROTOTYPES
312 va_start(vargs, format);
313#else
314 va_start(vargs);
315#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 ret = PyString_FromFormatV(format, vargs);
317 va_end(vargs);
318 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319}
320
321
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000322PyObject *PyString_Decode(const char *s,
323 int size,
324 const char *encoding,
325 const char *errors)
326{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000327 PyObject *v, *str;
328
329 str = PyString_FromStringAndSize(s, size);
330 if (str == NULL)
331 return NULL;
332 v = PyString_AsDecodedString(str, encoding, errors);
333 Py_DECREF(str);
334 return v;
335}
336
337PyObject *PyString_AsDecodedObject(PyObject *str,
338 const char *encoding,
339 const char *errors)
340{
341 PyObject *v;
342
343 if (!PyString_Check(str)) {
344 PyErr_BadArgument();
345 goto onError;
346 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000347
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000348 if (encoding == NULL) {
349#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000351#else
352 PyErr_SetString(PyExc_ValueError, "no encoding specified");
353 goto onError;
354#endif
355 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356
357 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000358 v = PyCodec_Decode(str, encoding, errors);
359 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361
362 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000363
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365 return NULL;
366}
367
368PyObject *PyString_AsDecodedString(PyObject *str,
369 const char *encoding,
370 const char *errors)
371{
372 PyObject *v;
373
374 v = PyString_AsDecodedObject(str, encoding, errors);
375 if (v == NULL)
376 goto onError;
377
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000378#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000379 /* Convert Unicode to a string using the default encoding */
380 if (PyUnicode_Check(v)) {
381 PyObject *temp = v;
382 v = PyUnicode_AsEncodedString(v, NULL, NULL);
383 Py_DECREF(temp);
384 if (v == NULL)
385 goto onError;
386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 if (!PyString_Check(v)) {
389 PyErr_Format(PyExc_TypeError,
390 "decoder did not return a string object (type=%.400s)",
391 v->ob_type->tp_name);
392 Py_DECREF(v);
393 goto onError;
394 }
395
396 return v;
397
398 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 return NULL;
400}
401
402PyObject *PyString_Encode(const char *s,
403 int size,
404 const char *encoding,
405 const char *errors)
406{
407 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 str = PyString_FromStringAndSize(s, size);
410 if (str == NULL)
411 return NULL;
412 v = PyString_AsEncodedString(str, encoding, errors);
413 Py_DECREF(str);
414 return v;
415}
416
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000417PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 const char *encoding,
419 const char *errors)
420{
421 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000422
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000423 if (!PyString_Check(str)) {
424 PyErr_BadArgument();
425 goto onError;
426 }
427
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000428 if (encoding == NULL) {
429#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000431#else
432 PyErr_SetString(PyExc_ValueError, "no encoding specified");
433 goto onError;
434#endif
435 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436
437 /* Encode via the codec registry */
438 v = PyCodec_Encode(str, encoding, errors);
439 if (v == NULL)
440 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000441
442 return v;
443
444 onError:
445 return NULL;
446}
447
448PyObject *PyString_AsEncodedString(PyObject *str,
449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v;
453
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000454 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000455 if (v == NULL)
456 goto onError;
457
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000458#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 /* Convert Unicode to a string using the default encoding */
460 if (PyUnicode_Check(v)) {
461 PyObject *temp = v;
462 v = PyUnicode_AsEncodedString(v, NULL, NULL);
463 Py_DECREF(temp);
464 if (v == NULL)
465 goto onError;
466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(v)) {
469 PyErr_Format(PyExc_TypeError,
470 "encoder did not return a string object (type=%.400s)",
471 v->ob_type->tp_name);
472 Py_DECREF(v);
473 goto onError;
474 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000476 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000477
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 onError:
479 return NULL;
480}
481
Guido van Rossum234f9421993-06-17 12:35:49 +0000482static void
Fred Drakeba096332000-07-09 07:04:36 +0000483string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000484{
Guido van Rossum9475a232001-10-05 20:51:39 +0000485 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000486}
487
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000488static int
489string_getsize(register PyObject *op)
490{
491 char *s;
492 int len;
493 if (PyString_AsStringAndSize(op, &s, &len))
494 return -1;
495 return len;
496}
497
498static /*const*/ char *
499string_getbuffer(register PyObject *op)
500{
501 char *s;
502 int len;
503 if (PyString_AsStringAndSize(op, &s, &len))
504 return NULL;
505 return s;
506}
507
Guido van Rossumd7047b31995-01-02 19:07:15 +0000508int
Fred Drakeba096332000-07-09 07:04:36 +0000509PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000511 if (!PyString_Check(op))
512 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000513 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
516/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000517PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519 if (!PyString_Check(op))
520 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000521 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522}
523
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000524int
525PyString_AsStringAndSize(register PyObject *obj,
526 register char **s,
527 register int *len)
528{
529 if (s == NULL) {
530 PyErr_BadInternalCall();
531 return -1;
532 }
533
534 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000536 if (PyUnicode_Check(obj)) {
537 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
538 if (obj == NULL)
539 return -1;
540 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000541 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000542#endif
543 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000544 PyErr_Format(PyExc_TypeError,
545 "expected string or Unicode object, "
546 "%.200s found", obj->ob_type->tp_name);
547 return -1;
548 }
549 }
550
551 *s = PyString_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyString_GET_SIZE(obj);
554 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected string without null bytes");
557 return -1;
558 }
559 return 0;
560}
561
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562/* Methods */
563
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564static int
Fred Drakeba096332000-07-09 07:04:36 +0000565string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566{
567 int i;
568 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000570
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000571 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000572 if (! PyString_CheckExact(op)) {
573 int ret;
574 /* A str subclass may have its own __str__ method. */
575 op = (PyStringObject *) PyObject_Str((PyObject *)op);
576 if (op == NULL)
577 return -1;
578 ret = string_print(op, fp, flags);
579 Py_DECREF(op);
580 return ret;
581 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000582 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000584 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000586
Thomas Wouters7e474022000-07-16 12:04:32 +0000587 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000588 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000589 if (memchr(op->ob_sval, '\'', op->ob_size) &&
590 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 quote = '"';
592
593 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594 for (i = 0; i < op->ob_size; i++) {
595 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000596 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000598 else if (c == '\t')
599 fprintf(fp, "\\t");
600 else if (c == '\n')
601 fprintf(fp, "\\n");
602 else if (c == '\r')
603 fprintf(fp, "\\r");
604 else if (c < ' ' || c >= 0x7f)
605 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000606 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000607 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000608 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000609 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000610 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000611}
612
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000613static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000614string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000616 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
617 PyObject *v;
618 if (newsize > INT_MAX) {
619 PyErr_SetString(PyExc_OverflowError,
620 "string is too large to make repr");
621 }
622 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000624 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000625 }
626 else {
627 register int i;
628 register char c;
629 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000630 int quote;
631
Thomas Wouters7e474022000-07-16 12:04:32 +0000632 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000633 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000634 if (memchr(op->ob_sval, '\'', op->ob_size) &&
635 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000636 quote = '"';
637
Tim Peters9161c8b2001-12-03 01:55:38 +0000638 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000639 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000641 /* There's at least enough room for a hex escape
642 and a closing quote. */
643 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000645 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000646 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000647 else if (c == '\t')
648 *p++ = '\\', *p++ = 't';
649 else if (c == '\n')
650 *p++ = '\\', *p++ = 'n';
651 else if (c == '\r')
652 *p++ = '\\', *p++ = 'r';
653 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000654 /* For performance, we don't want to call
655 PyOS_snprintf here (extra layers of
656 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000657 sprintf(p, "\\x%02x", c & 0xff);
658 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000659 }
660 else
661 *p++ = c;
662 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000663 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000664 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000666 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000667 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000668 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670}
671
Guido van Rossum189f1df2001-05-01 16:51:53 +0000672static PyObject *
673string_str(PyObject *s)
674{
Tim Petersc9933152001-10-16 20:18:24 +0000675 assert(PyString_Check(s));
676 if (PyString_CheckExact(s)) {
677 Py_INCREF(s);
678 return s;
679 }
680 else {
681 /* Subtype -- return genuine string with the same value. */
682 PyStringObject *t = (PyStringObject *) s;
683 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
684 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000685}
686
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687static int
Fred Drakeba096332000-07-09 07:04:36 +0000688string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689{
690 return a->ob_size;
691}
692
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000694string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
696 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000697 register PyStringObject *op;
698 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000699#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (PyUnicode_Check(bb))
701 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000702#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000703 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000704 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000705 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706 return NULL;
707 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000708#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000710 if ((a->ob_size == 0 || b->ob_size == 0) &&
711 PyString_CheckExact(a) && PyString_CheckExact(b)) {
712 if (a->ob_size == 0) {
713 Py_INCREF(bb);
714 return bb;
715 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000716 Py_INCREF(a);
717 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718 }
719 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000720 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000721 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000722 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000723 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000724 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000725 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000726 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000727 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000728 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
729 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
730 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000731 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732#undef b
733}
734
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000736string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000737{
738 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000739 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000740 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000741 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742 if (n < 0)
743 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000744 /* watch out for overflows: the size can overflow int,
745 * and the # of bytes needed can overflow size_t
746 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000748 if (n && size / n != a->ob_size) {
749 PyErr_SetString(PyExc_OverflowError,
750 "repeated string is too long");
751 return NULL;
752 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000753 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000754 Py_INCREF(a);
755 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756 }
Tim Peters8f422462000-09-09 06:13:41 +0000757 nbytes = size * sizeof(char);
758 if (nbytes / sizeof(char) != (size_t)size ||
759 nbytes + sizeof(PyStringObject) <= nbytes) {
760 PyErr_SetString(PyExc_OverflowError,
761 "repeated string is too long");
762 return NULL;
763 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000764 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000765 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000766 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000768 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000769 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000770 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000771 for (i = 0; i < size; i += a->ob_size)
772 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
773 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000774 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000775}
776
777/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
778
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000779static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000780string_slice(register PyStringObject *a, register int i, register int j)
781 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782{
783 if (i < 0)
784 i = 0;
785 if (j < 0)
786 j = 0; /* Avoid signed/unsigned bug in next line */
787 if (j > a->ob_size)
788 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000789 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
790 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000791 Py_INCREF(a);
792 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
794 if (j < i)
795 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000796 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797}
798
Guido van Rossum9284a572000-03-07 15:53:43 +0000799static int
Fred Drakeba096332000-07-09 07:04:36 +0000800string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000801{
802 register char *s, *end;
803 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000804#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000805 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000806 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000807#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000808 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000809 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000810 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000811 return -1;
812 }
813 c = PyString_AsString(el)[0];
814 s = PyString_AsString(a);
815 end = s + PyString_Size(a);
816 while (s < end) {
817 if (c == *s++)
818 return 1;
819 }
820 return 0;
821}
822
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000823static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000824string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000826 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000827 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000829 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 return NULL;
831 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000832 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000833 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000834 if (v == NULL)
835 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000836 else {
837#ifdef COUNT_ALLOCS
838 one_strings++;
839#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000840 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000841 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000842 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843}
844
Martin v. Löwiscd353062001-05-24 16:56:35 +0000845static PyObject*
846string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000848 int c;
849 int len_a, len_b;
850 int min_len;
851 PyObject *result;
852
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000853 /* Make sure both arguments are strings. */
854 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000855 result = Py_NotImplemented;
856 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000857 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000858 if (a == b) {
859 switch (op) {
860 case Py_EQ:case Py_LE:case Py_GE:
861 result = Py_True;
862 goto out;
863 case Py_NE:case Py_LT:case Py_GT:
864 result = Py_False;
865 goto out;
866 }
867 }
868 if (op == Py_EQ) {
869 /* Supporting Py_NE here as well does not save
870 much time, since Py_NE is rarely used. */
871 if (a->ob_size == b->ob_size
872 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000873 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +0000874 a->ob_size) == 0)) {
875 result = Py_True;
876 } else {
877 result = Py_False;
878 }
879 goto out;
880 }
881 len_a = a->ob_size; len_b = b->ob_size;
882 min_len = (len_a < len_b) ? len_a : len_b;
883 if (min_len > 0) {
884 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
885 if (c==0)
886 c = memcmp(a->ob_sval, b->ob_sval, min_len);
887 }else
888 c = 0;
889 if (c == 0)
890 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
891 switch (op) {
892 case Py_LT: c = c < 0; break;
893 case Py_LE: c = c <= 0; break;
894 case Py_EQ: assert(0); break; /* unreachable */
895 case Py_NE: c = c != 0; break;
896 case Py_GT: c = c > 0; break;
897 case Py_GE: c = c >= 0; break;
898 default:
899 result = Py_NotImplemented;
900 goto out;
901 }
902 result = c ? Py_True : Py_False;
903 out:
904 Py_INCREF(result);
905 return result;
906}
907
908int
909_PyString_Eq(PyObject *o1, PyObject *o2)
910{
911 PyStringObject *a, *b;
912 a = (PyStringObject*)o1;
913 b = (PyStringObject*)o2;
914 return a->ob_size == b->ob_size
915 && *a->ob_sval == *b->ob_sval
916 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917}
918
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919static long
Fred Drakeba096332000-07-09 07:04:36 +0000920string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000921{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000922 register int len;
923 register unsigned char *p;
924 register long x;
925
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000926 if (a->ob_shash != -1)
927 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000928 if (a->ob_sinterned != NULL)
929 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000931 len = a->ob_size;
932 p = (unsigned char *) a->ob_sval;
933 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000934 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000935 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000936 x ^= a->ob_size;
937 if (x == -1)
938 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000939 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000940 return x;
941}
942
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000943static PyObject*
944string_subscript(PyStringObject* self, PyObject* item)
945{
946 if (PyInt_Check(item)) {
947 long i = PyInt_AS_LONG(item);
948 if (i < 0)
949 i += PyString_GET_SIZE(self);
950 return string_item(self,i);
951 }
952 else if (PyLong_Check(item)) {
953 long i = PyLong_AsLong(item);
954 if (i == -1 && PyErr_Occurred())
955 return NULL;
956 if (i < 0)
957 i += PyString_GET_SIZE(self);
958 return string_item(self,i);
959 }
960 else if (PySlice_Check(item)) {
961 int start, stop, step, slicelength, cur, i;
962 char* source_buf;
963 char* result_buf;
964 PyObject* result;
965
966 if (PySlice_GetIndicesEx((PySliceObject*)item,
967 PyString_GET_SIZE(self),
968 &start, &stop, &step, &slicelength) < 0) {
969 return NULL;
970 }
971
972 if (slicelength <= 0) {
973 return PyString_FromStringAndSize("", 0);
974 }
975 else {
976 source_buf = PyString_AsString((PyObject*)self);
977 result_buf = PyMem_Malloc(slicelength);
978
979 for (cur = start, i = 0; i < slicelength;
980 cur += step, i++) {
981 result_buf[i] = source_buf[cur];
982 }
983
984 result = PyString_FromStringAndSize(result_buf,
985 slicelength);
986 PyMem_Free(result_buf);
987 return result;
988 }
989 }
990 else {
991 PyErr_SetString(PyExc_TypeError,
992 "string indices must be integers");
993 return NULL;
994 }
995}
996
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000997static int
Fred Drakeba096332000-07-09 07:04:36 +0000998string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000999{
1000 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001001 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001002 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001003 return -1;
1004 }
1005 *ptr = (void *)self->ob_sval;
1006 return self->ob_size;
1007}
1008
1009static int
Fred Drakeba096332000-07-09 07:04:36 +00001010string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001011{
Guido van Rossum045e6881997-09-08 18:30:11 +00001012 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001013 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001014 return -1;
1015}
1016
1017static int
Fred Drakeba096332000-07-09 07:04:36 +00001018string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001019{
1020 if ( lenp )
1021 *lenp = self->ob_size;
1022 return 1;
1023}
1024
Guido van Rossum1db70701998-10-08 02:18:52 +00001025static int
Fred Drakeba096332000-07-09 07:04:36 +00001026string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001027{
1028 if ( index != 0 ) {
1029 PyErr_SetString(PyExc_SystemError,
1030 "accessing non-existent string segment");
1031 return -1;
1032 }
1033 *ptr = self->ob_sval;
1034 return self->ob_size;
1035}
1036
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001037static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001038 (inquiry)string_length, /*sq_length*/
1039 (binaryfunc)string_concat, /*sq_concat*/
1040 (intargfunc)string_repeat, /*sq_repeat*/
1041 (intargfunc)string_item, /*sq_item*/
1042 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001043 0, /*sq_ass_item*/
1044 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001045 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001046};
1047
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001048static PyMappingMethods string_as_mapping = {
1049 (inquiry)string_length,
1050 (binaryfunc)string_subscript,
1051 0,
1052};
1053
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001054static PyBufferProcs string_as_buffer = {
1055 (getreadbufferproc)string_buffer_getreadbuf,
1056 (getwritebufferproc)string_buffer_getwritebuf,
1057 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001058 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001059};
1060
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001061
1062
1063#define LEFTSTRIP 0
1064#define RIGHTSTRIP 1
1065#define BOTHSTRIP 2
1066
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001067/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001068static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1069
1070#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001071
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001072
1073static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001074split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001075{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001076 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001077 PyObject* item;
1078 PyObject *list = PyList_New(0);
1079
1080 if (list == NULL)
1081 return NULL;
1082
Guido van Rossum4c08d552000-03-10 22:55:18 +00001083 for (i = j = 0; i < len; ) {
1084 while (i < len && isspace(Py_CHARMASK(s[i])))
1085 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001087 while (i < len && !isspace(Py_CHARMASK(s[i])))
1088 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001089 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001090 if (maxsplit-- <= 0)
1091 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001092 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1093 if (item == NULL)
1094 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001095 err = PyList_Append(list, item);
1096 Py_DECREF(item);
1097 if (err < 0)
1098 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001099 while (i < len && isspace(Py_CHARMASK(s[i])))
1100 i++;
1101 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001102 }
1103 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001104 if (j < len) {
1105 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1106 if (item == NULL)
1107 goto finally;
1108 err = PyList_Append(list, item);
1109 Py_DECREF(item);
1110 if (err < 0)
1111 goto finally;
1112 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113 return list;
1114 finally:
1115 Py_DECREF(list);
1116 return NULL;
1117}
1118
1119
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001120PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001121"S.split([sep [,maxsplit]]) -> list of strings\n\
1122\n\
1123Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001124delimiter string. If maxsplit is given, at most maxsplit\n\
1125splits are done. If sep is not specified, any whitespace string\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001126is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001127
1128static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001129string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001130{
1131 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001132 int maxsplit = -1;
1133 const char *s = PyString_AS_STRING(self), *sub;
1134 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001135
Guido van Rossum4c08d552000-03-10 22:55:18 +00001136 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001137 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001138 if (maxsplit < 0)
1139 maxsplit = INT_MAX;
1140 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001142 if (PyString_Check(subobj)) {
1143 sub = PyString_AS_STRING(subobj);
1144 n = PyString_GET_SIZE(subobj);
1145 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001146#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001147 else if (PyUnicode_Check(subobj))
1148 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001149#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001150 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1151 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152 if (n == 0) {
1153 PyErr_SetString(PyExc_ValueError, "empty separator");
1154 return NULL;
1155 }
1156
1157 list = PyList_New(0);
1158 if (list == NULL)
1159 return NULL;
1160
1161 i = j = 0;
1162 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001163 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001164 if (maxsplit-- <= 0)
1165 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001166 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1167 if (item == NULL)
1168 goto fail;
1169 err = PyList_Append(list, item);
1170 Py_DECREF(item);
1171 if (err < 0)
1172 goto fail;
1173 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001174 }
1175 else
1176 i++;
1177 }
1178 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1179 if (item == NULL)
1180 goto fail;
1181 err = PyList_Append(list, item);
1182 Py_DECREF(item);
1183 if (err < 0)
1184 goto fail;
1185
1186 return list;
1187
1188 fail:
1189 Py_DECREF(list);
1190 return NULL;
1191}
1192
1193
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001194PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001195"S.join(sequence) -> string\n\
1196\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001197Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001198sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001199
1200static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001201string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001202{
1203 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001204 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001205 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001206 char *p;
1207 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001208 size_t sz = 0;
1209 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001210 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001211
Tim Peters19fe14e2001-01-19 03:03:47 +00001212 seq = PySequence_Fast(orig, "");
1213 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001214 if (PyErr_ExceptionMatches(PyExc_TypeError))
1215 PyErr_Format(PyExc_TypeError,
1216 "sequence expected, %.80s found",
1217 orig->ob_type->tp_name);
1218 return NULL;
1219 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001220
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001221 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001222 if (seqlen == 0) {
1223 Py_DECREF(seq);
1224 return PyString_FromString("");
1225 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001226 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001227 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001228 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1229 PyErr_Format(PyExc_TypeError,
1230 "sequence item 0: expected string,"
1231 " %.80s found",
1232 item->ob_type->tp_name);
1233 Py_DECREF(seq);
1234 return NULL;
1235 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001236 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001237 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001238 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001239 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001240
Tim Peters19fe14e2001-01-19 03:03:47 +00001241 /* There are at least two things to join. Do a pre-pass to figure out
1242 * the total amount of space we'll need (sz), see whether any argument
1243 * is absurd, and defer to the Unicode join if appropriate.
1244 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001245 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001246 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001247 item = PySequence_Fast_GET_ITEM(seq, i);
1248 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001249#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001250 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001251 /* Defer to Unicode join.
1252 * CAUTION: There's no gurantee that the
1253 * original sequence can be iterated over
1254 * again, so we must pass seq here.
1255 */
1256 PyObject *result;
1257 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001258 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001259 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001260 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001261#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001262 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001263 "sequence item %i: expected string,"
1264 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001265 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001266 Py_DECREF(seq);
1267 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001268 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001269 sz += PyString_GET_SIZE(item);
1270 if (i != 0)
1271 sz += seplen;
1272 if (sz < old_sz || sz > INT_MAX) {
1273 PyErr_SetString(PyExc_OverflowError,
1274 "join() is too long for a Python string");
1275 Py_DECREF(seq);
1276 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001277 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001278 }
1279
1280 /* Allocate result space. */
1281 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1282 if (res == NULL) {
1283 Py_DECREF(seq);
1284 return NULL;
1285 }
1286
1287 /* Catenate everything. */
1288 p = PyString_AS_STRING(res);
1289 for (i = 0; i < seqlen; ++i) {
1290 size_t n;
1291 item = PySequence_Fast_GET_ITEM(seq, i);
1292 n = PyString_GET_SIZE(item);
1293 memcpy(p, PyString_AS_STRING(item), n);
1294 p += n;
1295 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001296 memcpy(p, sep, seplen);
1297 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001298 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001300
Jeremy Hylton49048292000-07-11 03:28:17 +00001301 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303}
1304
Tim Peters52e155e2001-06-16 05:42:57 +00001305PyObject *
1306_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001307{
Tim Petersa7259592001-06-16 05:11:17 +00001308 assert(sep != NULL && PyString_Check(sep));
1309 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001310 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001311}
1312
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001313static void
1314string_adjust_indices(int *start, int *end, int len)
1315{
1316 if (*end > len)
1317 *end = len;
1318 else if (*end < 0)
1319 *end += len;
1320 if (*end < 0)
1321 *end = 0;
1322 if (*start < 0)
1323 *start += len;
1324 if (*start < 0)
1325 *start = 0;
1326}
1327
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328static long
Fred Drakeba096332000-07-09 07:04:36 +00001329string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001331 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332 int len = PyString_GET_SIZE(self);
1333 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001334 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001336 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001337 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001338 return -2;
1339 if (PyString_Check(subobj)) {
1340 sub = PyString_AS_STRING(subobj);
1341 n = PyString_GET_SIZE(subobj);
1342 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001343#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001344 else if (PyUnicode_Check(subobj))
1345 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001346#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001347 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348 return -2;
1349
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001350 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001351
Guido van Rossum4c08d552000-03-10 22:55:18 +00001352 if (dir > 0) {
1353 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 last -= n;
1356 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001357 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001358 return (long)i;
1359 }
1360 else {
1361 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001362
Guido van Rossum4c08d552000-03-10 22:55:18 +00001363 if (n == 0 && i <= last)
1364 return (long)last;
1365 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001366 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001367 return (long)j;
1368 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001369
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001370 return -1;
1371}
1372
1373
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001374PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375"S.find(sub [,start [,end]]) -> int\n\
1376\n\
1377Return the lowest index in S where substring sub is found,\n\
1378such that sub is contained within s[start,end]. Optional\n\
1379arguments start and end are interpreted as in slice notation.\n\
1380\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001381Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001382
1383static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001384string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001386 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387 if (result == -2)
1388 return NULL;
1389 return PyInt_FromLong(result);
1390}
1391
1392
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001393PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001394"S.index(sub [,start [,end]]) -> int\n\
1395\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001396Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001397
1398static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001399string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001401 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 if (result == -2)
1403 return NULL;
1404 if (result == -1) {
1405 PyErr_SetString(PyExc_ValueError,
1406 "substring not found in string.index");
1407 return NULL;
1408 }
1409 return PyInt_FromLong(result);
1410}
1411
1412
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001413PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414"S.rfind(sub [,start [,end]]) -> int\n\
1415\n\
1416Return the highest index in S where substring sub is found,\n\
1417such that sub is contained within s[start,end]. Optional\n\
1418arguments start and end are interpreted as in slice notation.\n\
1419\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001420Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421
1422static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001423string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001425 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426 if (result == -2)
1427 return NULL;
1428 return PyInt_FromLong(result);
1429}
1430
1431
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001432PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433"S.rindex(sub [,start [,end]]) -> int\n\
1434\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001435Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436
1437static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001438string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001440 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441 if (result == -2)
1442 return NULL;
1443 if (result == -1) {
1444 PyErr_SetString(PyExc_ValueError,
1445 "substring not found in string.rindex");
1446 return NULL;
1447 }
1448 return PyInt_FromLong(result);
1449}
1450
1451
1452static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001453do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1454{
1455 char *s = PyString_AS_STRING(self);
1456 int len = PyString_GET_SIZE(self);
1457 char *sep = PyString_AS_STRING(sepobj);
1458 int seplen = PyString_GET_SIZE(sepobj);
1459 int i, j;
1460
1461 i = 0;
1462 if (striptype != RIGHTSTRIP) {
1463 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1464 i++;
1465 }
1466 }
1467
1468 j = len;
1469 if (striptype != LEFTSTRIP) {
1470 do {
1471 j--;
1472 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1473 j++;
1474 }
1475
1476 if (i == 0 && j == len && PyString_CheckExact(self)) {
1477 Py_INCREF(self);
1478 return (PyObject*)self;
1479 }
1480 else
1481 return PyString_FromStringAndSize(s+i, j-i);
1482}
1483
1484
1485static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001486do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487{
1488 char *s = PyString_AS_STRING(self);
1489 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 i = 0;
1492 if (striptype != RIGHTSTRIP) {
1493 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1494 i++;
1495 }
1496 }
1497
1498 j = len;
1499 if (striptype != LEFTSTRIP) {
1500 do {
1501 j--;
1502 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1503 j++;
1504 }
1505
Tim Peters8fa5dd02001-09-12 02:18:30 +00001506 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 Py_INCREF(self);
1508 return (PyObject*)self;
1509 }
1510 else
1511 return PyString_FromStringAndSize(s+i, j-i);
1512}
1513
1514
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001515static PyObject *
1516do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1517{
1518 PyObject *sep = NULL;
1519
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001520 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001521 return NULL;
1522
1523 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001524 if (PyString_Check(sep))
1525 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001526#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001527 else if (PyUnicode_Check(sep)) {
1528 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1529 PyObject *res;
1530 if (uniself==NULL)
1531 return NULL;
1532 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1533 striptype, sep);
1534 Py_DECREF(uniself);
1535 return res;
1536 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001537#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001538 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001539 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001540#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001541 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001542#else
1543 "%s arg must be None or str",
1544#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001545 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001546 return NULL;
1547 }
1548 return do_xstrip(self, striptype, sep);
1549 }
1550
1551 return do_strip(self, striptype);
1552}
1553
1554
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001555PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001556"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001557\n\
1558Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001559whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001560If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001561If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001562
1563static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001564string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001566 if (PyTuple_GET_SIZE(args) == 0)
1567 return do_strip(self, BOTHSTRIP); /* Common case */
1568 else
1569 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570}
1571
1572
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001573PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001574"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001576Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001577If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001578If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579
1580static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001581string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001583 if (PyTuple_GET_SIZE(args) == 0)
1584 return do_strip(self, LEFTSTRIP); /* Common case */
1585 else
1586 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001587}
1588
1589
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001590PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001591"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001593Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001594If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001595If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596
1597static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001598string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001600 if (PyTuple_GET_SIZE(args) == 0)
1601 return do_strip(self, RIGHTSTRIP); /* Common case */
1602 else
1603 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604}
1605
1606
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001607PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608"S.lower() -> string\n\
1609\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001610Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611
1612static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001613string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614{
1615 char *s = PyString_AS_STRING(self), *s_new;
1616 int i, n = PyString_GET_SIZE(self);
1617 PyObject *new;
1618
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 new = PyString_FromStringAndSize(NULL, n);
1620 if (new == NULL)
1621 return NULL;
1622 s_new = PyString_AsString(new);
1623 for (i = 0; i < n; i++) {
1624 int c = Py_CHARMASK(*s++);
1625 if (isupper(c)) {
1626 *s_new = tolower(c);
1627 } else
1628 *s_new = c;
1629 s_new++;
1630 }
1631 return new;
1632}
1633
1634
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001635PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636"S.upper() -> string\n\
1637\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001638Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639
1640static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001641string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001642{
1643 char *s = PyString_AS_STRING(self), *s_new;
1644 int i, n = PyString_GET_SIZE(self);
1645 PyObject *new;
1646
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647 new = PyString_FromStringAndSize(NULL, n);
1648 if (new == NULL)
1649 return NULL;
1650 s_new = PyString_AsString(new);
1651 for (i = 0; i < n; i++) {
1652 int c = Py_CHARMASK(*s++);
1653 if (islower(c)) {
1654 *s_new = toupper(c);
1655 } else
1656 *s_new = c;
1657 s_new++;
1658 }
1659 return new;
1660}
1661
1662
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001663PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001664"S.title() -> string\n\
1665\n\
1666Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001667characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001668
1669static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001670string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671{
1672 char *s = PyString_AS_STRING(self), *s_new;
1673 int i, n = PyString_GET_SIZE(self);
1674 int previous_is_cased = 0;
1675 PyObject *new;
1676
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677 new = PyString_FromStringAndSize(NULL, n);
1678 if (new == NULL)
1679 return NULL;
1680 s_new = PyString_AsString(new);
1681 for (i = 0; i < n; i++) {
1682 int c = Py_CHARMASK(*s++);
1683 if (islower(c)) {
1684 if (!previous_is_cased)
1685 c = toupper(c);
1686 previous_is_cased = 1;
1687 } else if (isupper(c)) {
1688 if (previous_is_cased)
1689 c = tolower(c);
1690 previous_is_cased = 1;
1691 } else
1692 previous_is_cased = 0;
1693 *s_new++ = c;
1694 }
1695 return new;
1696}
1697
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001698PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001699"S.capitalize() -> string\n\
1700\n\
1701Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001702capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703
1704static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001705string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706{
1707 char *s = PyString_AS_STRING(self), *s_new;
1708 int i, n = PyString_GET_SIZE(self);
1709 PyObject *new;
1710
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711 new = PyString_FromStringAndSize(NULL, n);
1712 if (new == NULL)
1713 return NULL;
1714 s_new = PyString_AsString(new);
1715 if (0 < n) {
1716 int c = Py_CHARMASK(*s++);
1717 if (islower(c))
1718 *s_new = toupper(c);
1719 else
1720 *s_new = c;
1721 s_new++;
1722 }
1723 for (i = 1; i < n; i++) {
1724 int c = Py_CHARMASK(*s++);
1725 if (isupper(c))
1726 *s_new = tolower(c);
1727 else
1728 *s_new = c;
1729 s_new++;
1730 }
1731 return new;
1732}
1733
1734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001735PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736"S.count(sub[, start[, end]]) -> int\n\
1737\n\
1738Return the number of occurrences of substring sub in string\n\
1739S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001740interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741
1742static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001743string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746 int len = PyString_GET_SIZE(self), n;
1747 int i = 0, last = INT_MAX;
1748 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001749 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750
Guido van Rossumc6821402000-05-08 14:08:05 +00001751 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1752 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001753 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001754
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 if (PyString_Check(subobj)) {
1756 sub = PyString_AS_STRING(subobj);
1757 n = PyString_GET_SIZE(subobj);
1758 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001759#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001760 else if (PyUnicode_Check(subobj)) {
1761 int count;
1762 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1763 if (count == -1)
1764 return NULL;
1765 else
1766 return PyInt_FromLong((long) count);
1767 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001768#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001769 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1770 return NULL;
1771
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001772 string_adjust_indices(&i, &last, len);
1773
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774 m = last + 1 - n;
1775 if (n == 0)
1776 return PyInt_FromLong((long) (m-i));
1777
1778 r = 0;
1779 while (i < m) {
1780 if (!memcmp(s+i, sub, n)) {
1781 r++;
1782 i += n;
1783 } else {
1784 i++;
1785 }
1786 }
1787 return PyInt_FromLong((long) r);
1788}
1789
1790
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001791PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792"S.swapcase() -> string\n\
1793\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001794Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001795converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796
1797static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001798string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799{
1800 char *s = PyString_AS_STRING(self), *s_new;
1801 int i, n = PyString_GET_SIZE(self);
1802 PyObject *new;
1803
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804 new = PyString_FromStringAndSize(NULL, n);
1805 if (new == NULL)
1806 return NULL;
1807 s_new = PyString_AsString(new);
1808 for (i = 0; i < n; i++) {
1809 int c = Py_CHARMASK(*s++);
1810 if (islower(c)) {
1811 *s_new = toupper(c);
1812 }
1813 else if (isupper(c)) {
1814 *s_new = tolower(c);
1815 }
1816 else
1817 *s_new = c;
1818 s_new++;
1819 }
1820 return new;
1821}
1822
1823
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001824PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825"S.translate(table [,deletechars]) -> string\n\
1826\n\
1827Return a copy of the string S, where all characters occurring\n\
1828in the optional argument deletechars are removed, and the\n\
1829remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001830translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831
1832static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001833string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001835 register char *input, *output;
1836 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001837 register int i, c, changed = 0;
1838 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001839 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840 int inlen, tablen, dellen = 0;
1841 PyObject *result;
1842 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001843 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844
Guido van Rossum4c08d552000-03-10 22:55:18 +00001845 if (!PyArg_ParseTuple(args, "O|O:translate",
1846 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001848
1849 if (PyString_Check(tableobj)) {
1850 table1 = PyString_AS_STRING(tableobj);
1851 tablen = PyString_GET_SIZE(tableobj);
1852 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001853#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001854 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001855 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001856 parameter; instead a mapping to None will cause characters
1857 to be deleted. */
1858 if (delobj != NULL) {
1859 PyErr_SetString(PyExc_TypeError,
1860 "deletions are implemented differently for unicode");
1861 return NULL;
1862 }
1863 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1864 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001865#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868
1869 if (delobj != NULL) {
1870 if (PyString_Check(delobj)) {
1871 del_table = PyString_AS_STRING(delobj);
1872 dellen = PyString_GET_SIZE(delobj);
1873 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001874#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001875 else if (PyUnicode_Check(delobj)) {
1876 PyErr_SetString(PyExc_TypeError,
1877 "deletions are implemented differently for unicode");
1878 return NULL;
1879 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001880#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001881 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1882 return NULL;
1883
1884 if (tablen != 256) {
1885 PyErr_SetString(PyExc_ValueError,
1886 "translation table must be 256 characters long");
1887 return NULL;
1888 }
1889 }
1890 else {
1891 del_table = NULL;
1892 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893 }
1894
1895 table = table1;
1896 inlen = PyString_Size(input_obj);
1897 result = PyString_FromStringAndSize((char *)NULL, inlen);
1898 if (result == NULL)
1899 return NULL;
1900 output_start = output = PyString_AsString(result);
1901 input = PyString_AsString(input_obj);
1902
1903 if (dellen == 0) {
1904 /* If no deletions are required, use faster code */
1905 for (i = inlen; --i >= 0; ) {
1906 c = Py_CHARMASK(*input++);
1907 if (Py_CHARMASK((*output++ = table[c])) != c)
1908 changed = 1;
1909 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001910 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911 return result;
1912 Py_DECREF(result);
1913 Py_INCREF(input_obj);
1914 return input_obj;
1915 }
1916
1917 for (i = 0; i < 256; i++)
1918 trans_table[i] = Py_CHARMASK(table[i]);
1919
1920 for (i = 0; i < dellen; i++)
1921 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1922
1923 for (i = inlen; --i >= 0; ) {
1924 c = Py_CHARMASK(*input++);
1925 if (trans_table[c] != -1)
1926 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1927 continue;
1928 changed = 1;
1929 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001930 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931 Py_DECREF(result);
1932 Py_INCREF(input_obj);
1933 return input_obj;
1934 }
1935 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001936 if (inlen > 0)
1937 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938 return result;
1939}
1940
1941
1942/* What follows is used for implementing replace(). Perry Stoll. */
1943
1944/*
1945 mymemfind
1946
1947 strstr replacement for arbitrary blocks of memory.
1948
Barry Warsaw51ac5802000-03-20 16:36:48 +00001949 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950 contents of memory pointed to by PAT. Returns the index into MEM if
1951 found, or -1 if not found. If len of PAT is greater than length of
1952 MEM, the function returns -1.
1953*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001954static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001955mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956{
1957 register int ii;
1958
1959 /* pattern can not occur in the last pat_len-1 chars */
1960 len -= pat_len;
1961
1962 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001963 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964 return ii;
1965 }
1966 }
1967 return -1;
1968}
1969
1970/*
1971 mymemcnt
1972
1973 Return the number of distinct times PAT is found in MEM.
1974 meaning mem=1111 and pat==11 returns 2.
1975 mem=11111 and pat==11 also return 2.
1976 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001977static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001978mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979{
1980 register int offset = 0;
1981 int nfound = 0;
1982
1983 while (len >= 0) {
1984 offset = mymemfind(mem, len, pat, pat_len);
1985 if (offset == -1)
1986 break;
1987 mem += offset + pat_len;
1988 len -= offset + pat_len;
1989 nfound++;
1990 }
1991 return nfound;
1992}
1993
1994/*
1995 mymemreplace
1996
Thomas Wouters7e474022000-07-16 12:04:32 +00001997 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998 replaced with SUB.
1999
Thomas Wouters7e474022000-07-16 12:04:32 +00002000 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001 of PAT in STR, then the original string is returned. Otherwise, a new
2002 string is allocated here and returned.
2003
2004 on return, out_len is:
2005 the length of output string, or
2006 -1 if the input string is returned, or
2007 unchanged if an error occurs (no memory).
2008
2009 return value is:
2010 the new string allocated locally, or
2011 NULL if an error occurred.
2012*/
2013static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002014mymemreplace(const char *str, int len, /* input string */
2015 const char *pat, int pat_len, /* pattern string to find */
2016 const char *sub, int sub_len, /* substitution string */
2017 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002018 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019{
2020 char *out_s;
2021 char *new_s;
2022 int nfound, offset, new_len;
2023
2024 if (len == 0 || pat_len > len)
2025 goto return_same;
2026
2027 /* find length of output string */
2028 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00002029 if (count < 0)
2030 count = INT_MAX;
2031 else if (nfound > count)
2032 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033 if (nfound == 0)
2034 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002035
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002037 if (new_len == 0) {
2038 /* Have to allocate something for the caller to free(). */
2039 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002040 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002041 return NULL;
2042 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002044 else {
2045 assert(new_len > 0);
2046 new_s = (char *)PyMem_MALLOC(new_len);
2047 if (new_s == NULL)
2048 return NULL;
2049 out_s = new_s;
2050
Tim Peters9c012af2001-05-10 00:32:57 +00002051 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00002052 /* find index of next instance of pattern */
2053 offset = mymemfind(str, len, pat, pat_len);
2054 if (offset == -1)
2055 break;
2056
2057 /* copy non matching part of input string */
2058 memcpy(new_s, str, offset);
2059 str += offset + pat_len;
2060 len -= offset + pat_len;
2061
2062 /* copy substitute into the output string */
2063 new_s += offset;
2064 memcpy(new_s, sub, sub_len);
2065 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002066 }
2067 /* copy any remaining values into output string */
2068 if (len > 0)
2069 memcpy(new_s, str, len);
2070 }
2071 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072 return out_s;
2073
2074 return_same:
2075 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002076 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077}
2078
2079
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002080PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081"S.replace (old, new[, maxsplit]) -> string\n\
2082\n\
2083Return a copy of string S with all occurrences of substring\n\
2084old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002085given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086
2087static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002088string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002090 const char *str = PyString_AS_STRING(self), *sub, *repl;
2091 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002092 const int len = PyString_GET_SIZE(self);
2093 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002094 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098 if (!PyArg_ParseTuple(args, "OO|i:replace",
2099 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101
2102 if (PyString_Check(subobj)) {
2103 sub = PyString_AS_STRING(subobj);
2104 sub_len = PyString_GET_SIZE(subobj);
2105 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002106#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002108 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002110#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2112 return NULL;
2113
2114 if (PyString_Check(replobj)) {
2115 repl = PyString_AS_STRING(replobj);
2116 repl_len = PyString_GET_SIZE(replobj);
2117 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002118#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002119 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002120 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002121 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002122#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002123 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2124 return NULL;
2125
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002126 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002127 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 return NULL;
2129 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002130 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131 if (new_s == NULL) {
2132 PyErr_NoMemory();
2133 return NULL;
2134 }
2135 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002136 if (PyString_CheckExact(self)) {
2137 /* we're returning another reference to self */
2138 new = (PyObject*)self;
2139 Py_INCREF(new);
2140 }
2141 else {
2142 new = PyString_FromStringAndSize(str, len);
2143 if (new == NULL)
2144 return NULL;
2145 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146 }
2147 else {
2148 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002149 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150 }
2151 return new;
2152}
2153
2154
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002155PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002156"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002158Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002160comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161
2162static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002163string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002165 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002167 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168 int plen;
2169 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002170 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172
Guido van Rossumc6821402000-05-08 14:08:05 +00002173 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2174 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175 return NULL;
2176 if (PyString_Check(subobj)) {
2177 prefix = PyString_AS_STRING(subobj);
2178 plen = PyString_GET_SIZE(subobj);
2179 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002180#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002181 else if (PyUnicode_Check(subobj)) {
2182 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002183 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002184 subobj, start, end, -1);
2185 if (rc == -1)
2186 return NULL;
2187 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002188 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002189 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002190#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192 return NULL;
2193
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002194 string_adjust_indices(&start, &end, len);
2195
2196 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002197 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002199 if (end-start >= plen)
2200 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2201 else
2202 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203}
2204
2205
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002206PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002207"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002208\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002209Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002211comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212
2213static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002214string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002215{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002216 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002218 const char* suffix;
2219 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002220 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002221 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223
Guido van Rossumc6821402000-05-08 14:08:05 +00002224 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2225 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226 return NULL;
2227 if (PyString_Check(subobj)) {
2228 suffix = PyString_AS_STRING(subobj);
2229 slen = PyString_GET_SIZE(subobj);
2230 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002231#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002232 else if (PyUnicode_Check(subobj)) {
2233 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002234 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002235 subobj, start, end, +1);
2236 if (rc == -1)
2237 return NULL;
2238 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002239 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002240 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002241#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002242 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243 return NULL;
2244
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002245 string_adjust_indices(&start, &end, len);
2246
2247 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002248 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002249
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002250 if (end-slen > start)
2251 start = end - slen;
2252 if (end-start >= slen)
2253 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2254 else
2255 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256}
2257
2258
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002259PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002260"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002261\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002262Encodes S using the codec registered for encoding. encoding defaults\n\
2263to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002264handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002265a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002266
2267static PyObject *
2268string_encode(PyStringObject *self, PyObject *args)
2269{
2270 char *encoding = NULL;
2271 char *errors = NULL;
2272 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2273 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002274 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2275}
2276
2277
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002278PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002279"S.decode([encoding[,errors]]) -> object\n\
2280\n\
2281Decodes S using the codec registered for encoding. encoding defaults\n\
2282to the default encoding. errors may be given to set a different error\n\
2283handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002284a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002285
2286static PyObject *
2287string_decode(PyStringObject *self, PyObject *args)
2288{
2289 char *encoding = NULL;
2290 char *errors = NULL;
2291 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2292 return NULL;
2293 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002294}
2295
2296
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002297PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002298"S.expandtabs([tabsize]) -> string\n\
2299\n\
2300Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002301If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302
2303static PyObject*
2304string_expandtabs(PyStringObject *self, PyObject *args)
2305{
2306 const char *e, *p;
2307 char *q;
2308 int i, j;
2309 PyObject *u;
2310 int tabsize = 8;
2311
2312 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2313 return NULL;
2314
Thomas Wouters7e474022000-07-16 12:04:32 +00002315 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002316 i = j = 0;
2317 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2318 for (p = PyString_AS_STRING(self); p < e; p++)
2319 if (*p == '\t') {
2320 if (tabsize > 0)
2321 j += tabsize - (j % tabsize);
2322 }
2323 else {
2324 j++;
2325 if (*p == '\n' || *p == '\r') {
2326 i += j;
2327 j = 0;
2328 }
2329 }
2330
2331 /* Second pass: create output string and fill it */
2332 u = PyString_FromStringAndSize(NULL, i + j);
2333 if (!u)
2334 return NULL;
2335
2336 j = 0;
2337 q = PyString_AS_STRING(u);
2338
2339 for (p = PyString_AS_STRING(self); p < e; p++)
2340 if (*p == '\t') {
2341 if (tabsize > 0) {
2342 i = tabsize - (j % tabsize);
2343 j += i;
2344 while (i--)
2345 *q++ = ' ';
2346 }
2347 }
2348 else {
2349 j++;
2350 *q++ = *p;
2351 if (*p == '\n' || *p == '\r')
2352 j = 0;
2353 }
2354
2355 return u;
2356}
2357
Tim Peters8fa5dd02001-09-12 02:18:30 +00002358static PyObject *
2359pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002360{
2361 PyObject *u;
2362
2363 if (left < 0)
2364 left = 0;
2365 if (right < 0)
2366 right = 0;
2367
Tim Peters8fa5dd02001-09-12 02:18:30 +00002368 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 Py_INCREF(self);
2370 return (PyObject *)self;
2371 }
2372
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002373 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 left + PyString_GET_SIZE(self) + right);
2375 if (u) {
2376 if (left)
2377 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002378 memcpy(PyString_AS_STRING(u) + left,
2379 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380 PyString_GET_SIZE(self));
2381 if (right)
2382 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2383 fill, right);
2384 }
2385
2386 return u;
2387}
2388
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002389PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002390"S.ljust(width) -> string\n"
2391"\n"
2392"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002393"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002394
2395static PyObject *
2396string_ljust(PyStringObject *self, PyObject *args)
2397{
2398 int width;
2399 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2400 return NULL;
2401
Tim Peters8fa5dd02001-09-12 02:18:30 +00002402 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002403 Py_INCREF(self);
2404 return (PyObject*) self;
2405 }
2406
2407 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2408}
2409
2410
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002411PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002412"S.rjust(width) -> string\n"
2413"\n"
2414"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002415"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002416
2417static PyObject *
2418string_rjust(PyStringObject *self, PyObject *args)
2419{
2420 int width;
2421 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2422 return NULL;
2423
Tim Peters8fa5dd02001-09-12 02:18:30 +00002424 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002425 Py_INCREF(self);
2426 return (PyObject*) self;
2427 }
2428
2429 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2430}
2431
2432
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002433PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002434"S.center(width) -> string\n"
2435"\n"
2436"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002437"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002438
2439static PyObject *
2440string_center(PyStringObject *self, PyObject *args)
2441{
2442 int marg, left;
2443 int width;
2444
2445 if (!PyArg_ParseTuple(args, "i:center", &width))
2446 return NULL;
2447
Tim Peters8fa5dd02001-09-12 02:18:30 +00002448 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449 Py_INCREF(self);
2450 return (PyObject*) self;
2451 }
2452
2453 marg = width - PyString_GET_SIZE(self);
2454 left = marg / 2 + (marg & width & 1);
2455
2456 return pad(self, left, marg - left, ' ');
2457}
2458
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002459PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002460"S.zfill(width) -> string\n"
2461"\n"
2462"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002463"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002464
2465static PyObject *
2466string_zfill(PyStringObject *self, PyObject *args)
2467{
2468 int fill;
2469 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002470 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002471
2472 int width;
2473 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2474 return NULL;
2475
2476 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002477 if (PyString_CheckExact(self)) {
2478 Py_INCREF(self);
2479 return (PyObject*) self;
2480 }
2481 else
2482 return PyString_FromStringAndSize(
2483 PyString_AS_STRING(self),
2484 PyString_GET_SIZE(self)
2485 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002486 }
2487
2488 fill = width - PyString_GET_SIZE(self);
2489
2490 s = pad(self, fill, 0, '0');
2491
2492 if (s == NULL)
2493 return NULL;
2494
2495 p = PyString_AS_STRING(s);
2496 if (p[fill] == '+' || p[fill] == '-') {
2497 /* move sign to beginning of string */
2498 p[0] = p[fill];
2499 p[fill] = '0';
2500 }
2501
2502 return (PyObject*) s;
2503}
2504
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002505PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002506"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002507"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002508"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002509"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002510
2511static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002512string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513{
Fred Drakeba096332000-07-09 07:04:36 +00002514 register const unsigned char *p
2515 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002516 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517
Guido van Rossum4c08d552000-03-10 22:55:18 +00002518 /* Shortcut for single character strings */
2519 if (PyString_GET_SIZE(self) == 1 &&
2520 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002521 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002523 /* Special case for empty strings */
2524 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002525 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002526
Guido van Rossum4c08d552000-03-10 22:55:18 +00002527 e = p + PyString_GET_SIZE(self);
2528 for (; p < e; p++) {
2529 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002530 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002531 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002532 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533}
2534
2535
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002536PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002537"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002538\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002539Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002540and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002541
2542static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002543string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002544{
Fred Drakeba096332000-07-09 07:04:36 +00002545 register const unsigned char *p
2546 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002547 register const unsigned char *e;
2548
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002549 /* Shortcut for single character strings */
2550 if (PyString_GET_SIZE(self) == 1 &&
2551 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002552 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002553
2554 /* Special case for empty strings */
2555 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002556 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002557
2558 e = p + PyString_GET_SIZE(self);
2559 for (; p < e; p++) {
2560 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002561 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002562 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002563 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002564}
2565
2566
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002567PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002568"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002569\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002570Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002571and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002572
2573static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002574string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002575{
Fred Drakeba096332000-07-09 07:04:36 +00002576 register const unsigned char *p
2577 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002578 register const unsigned char *e;
2579
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002580 /* Shortcut for single character strings */
2581 if (PyString_GET_SIZE(self) == 1 &&
2582 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002583 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002584
2585 /* Special case for empty strings */
2586 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002587 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002588
2589 e = p + PyString_GET_SIZE(self);
2590 for (; p < e; p++) {
2591 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002592 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002593 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002594 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002595}
2596
2597
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002598PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002599"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002600\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002601Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002602False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002603
2604static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002605string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606{
Fred Drakeba096332000-07-09 07:04:36 +00002607 register const unsigned char *p
2608 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002609 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002610
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611 /* Shortcut for single character strings */
2612 if (PyString_GET_SIZE(self) == 1 &&
2613 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002614 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002615
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002616 /* Special case for empty strings */
2617 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002618 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002619
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 e = p + PyString_GET_SIZE(self);
2621 for (; p < e; p++) {
2622 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002623 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002625 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002626}
2627
2628
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002629PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002630"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002631\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002632Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002633at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002634
2635static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002636string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002637{
Fred Drakeba096332000-07-09 07:04:36 +00002638 register const unsigned char *p
2639 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002640 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002641 int cased;
2642
Guido van Rossum4c08d552000-03-10 22:55:18 +00002643 /* Shortcut for single character strings */
2644 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002645 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002647 /* Special case for empty strings */
2648 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002649 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002650
Guido van Rossum4c08d552000-03-10 22:55:18 +00002651 e = p + PyString_GET_SIZE(self);
2652 cased = 0;
2653 for (; p < e; p++) {
2654 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002655 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002656 else if (!cased && islower(*p))
2657 cased = 1;
2658 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002659 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002660}
2661
2662
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002663PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002664"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002665\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002666Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002667at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002668
2669static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002670string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002671{
Fred Drakeba096332000-07-09 07:04:36 +00002672 register const unsigned char *p
2673 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002674 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002675 int cased;
2676
Guido van Rossum4c08d552000-03-10 22:55:18 +00002677 /* Shortcut for single character strings */
2678 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002679 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002680
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002681 /* Special case for empty strings */
2682 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002683 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002684
Guido van Rossum4c08d552000-03-10 22:55:18 +00002685 e = p + PyString_GET_SIZE(self);
2686 cased = 0;
2687 for (; p < e; p++) {
2688 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002689 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002690 else if (!cased && isupper(*p))
2691 cased = 1;
2692 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002693 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002694}
2695
2696
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002697PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002698"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002699\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002700Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002701may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002702ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002703
2704static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002705string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002706{
Fred Drakeba096332000-07-09 07:04:36 +00002707 register const unsigned char *p
2708 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002709 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002710 int cased, previous_is_cased;
2711
Guido van Rossum4c08d552000-03-10 22:55:18 +00002712 /* Shortcut for single character strings */
2713 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002714 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002715
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002716 /* Special case for empty strings */
2717 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002718 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002719
Guido van Rossum4c08d552000-03-10 22:55:18 +00002720 e = p + PyString_GET_SIZE(self);
2721 cased = 0;
2722 previous_is_cased = 0;
2723 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002724 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002725
2726 if (isupper(ch)) {
2727 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002728 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002729 previous_is_cased = 1;
2730 cased = 1;
2731 }
2732 else if (islower(ch)) {
2733 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002734 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002735 previous_is_cased = 1;
2736 cased = 1;
2737 }
2738 else
2739 previous_is_cased = 0;
2740 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002741 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002742}
2743
2744
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002745PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002746"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002747\n\
2748Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002749Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002750is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002751
2752#define SPLIT_APPEND(data, left, right) \
2753 str = PyString_FromStringAndSize(data + left, right - left); \
2754 if (!str) \
2755 goto onError; \
2756 if (PyList_Append(list, str)) { \
2757 Py_DECREF(str); \
2758 goto onError; \
2759 } \
2760 else \
2761 Py_DECREF(str);
2762
2763static PyObject*
2764string_splitlines(PyStringObject *self, PyObject *args)
2765{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002766 register int i;
2767 register int j;
2768 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002769 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002770 PyObject *list;
2771 PyObject *str;
2772 char *data;
2773
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002774 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002775 return NULL;
2776
2777 data = PyString_AS_STRING(self);
2778 len = PyString_GET_SIZE(self);
2779
Guido van Rossum4c08d552000-03-10 22:55:18 +00002780 list = PyList_New(0);
2781 if (!list)
2782 goto onError;
2783
2784 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002785 int eol;
2786
Guido van Rossum4c08d552000-03-10 22:55:18 +00002787 /* Find a line and append it */
2788 while (i < len && data[i] != '\n' && data[i] != '\r')
2789 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002790
2791 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002792 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002793 if (i < len) {
2794 if (data[i] == '\r' && i + 1 < len &&
2795 data[i+1] == '\n')
2796 i += 2;
2797 else
2798 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002799 if (keepends)
2800 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002801 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002802 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002803 j = i;
2804 }
2805 if (j < len) {
2806 SPLIT_APPEND(data, j, len);
2807 }
2808
2809 return list;
2810
2811 onError:
2812 Py_DECREF(list);
2813 return NULL;
2814}
2815
2816#undef SPLIT_APPEND
2817
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002818
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002819static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002820string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002821 /* Counterparts of the obsolete stropmodule functions; except
2822 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002823 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2824 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2825 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2826 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002827 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2828 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2829 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2830 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2831 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2832 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2833 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002834 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
2835 capitalize__doc__},
2836 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2837 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2838 endswith__doc__},
2839 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2840 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2841 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2842 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2843 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2844 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2845 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2846 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2847 startswith__doc__},
2848 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2849 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
2850 swapcase__doc__},
2851 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2852 translate__doc__},
2853 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2854 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2855 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2856 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2857 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
2858 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2859 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2860 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
2861 expandtabs__doc__},
2862 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
2863 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002864 {NULL, NULL} /* sentinel */
2865};
2866
Jeremy Hylton938ace62002-07-17 16:30:39 +00002867static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002868str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2869
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002870static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002871string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002872{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002873 PyObject *x = NULL;
2874 static char *kwlist[] = {"object", 0};
2875
Guido van Rossumae960af2001-08-30 03:11:59 +00002876 if (type != &PyString_Type)
2877 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002878 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2879 return NULL;
2880 if (x == NULL)
2881 return PyString_FromString("");
2882 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002883}
2884
Guido van Rossumae960af2001-08-30 03:11:59 +00002885static PyObject *
2886str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2887{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002888 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002889 int n;
2890
2891 assert(PyType_IsSubtype(type, &PyString_Type));
2892 tmp = string_new(&PyString_Type, args, kwds);
2893 if (tmp == NULL)
2894 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002895 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002896 n = PyString_GET_SIZE(tmp);
2897 pnew = type->tp_alloc(type, n);
2898 if (pnew != NULL) {
2899 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002900 ((PyStringObject *)pnew)->ob_shash =
2901 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002902 ((PyStringObject *)pnew)->ob_sinterned =
2903 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002904 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002905 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002906 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002907}
2908
Guido van Rossumcacfc072002-05-24 19:01:59 +00002909static PyObject *
2910basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2911{
2912 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002913 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00002914 return NULL;
2915}
2916
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002917PyDoc_STRVAR(basestring_doc,
2918"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00002919
2920PyTypeObject PyBaseString_Type = {
2921 PyObject_HEAD_INIT(&PyType_Type)
2922 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002923 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00002924 0,
2925 0,
2926 0, /* tp_dealloc */
2927 0, /* tp_print */
2928 0, /* tp_getattr */
2929 0, /* tp_setattr */
2930 0, /* tp_compare */
2931 0, /* tp_repr */
2932 0, /* tp_as_number */
2933 0, /* tp_as_sequence */
2934 0, /* tp_as_mapping */
2935 0, /* tp_hash */
2936 0, /* tp_call */
2937 0, /* tp_str */
2938 0, /* tp_getattro */
2939 0, /* tp_setattro */
2940 0, /* tp_as_buffer */
2941 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2942 basestring_doc, /* tp_doc */
2943 0, /* tp_traverse */
2944 0, /* tp_clear */
2945 0, /* tp_richcompare */
2946 0, /* tp_weaklistoffset */
2947 0, /* tp_iter */
2948 0, /* tp_iternext */
2949 0, /* tp_methods */
2950 0, /* tp_members */
2951 0, /* tp_getset */
2952 &PyBaseObject_Type, /* tp_base */
2953 0, /* tp_dict */
2954 0, /* tp_descr_get */
2955 0, /* tp_descr_set */
2956 0, /* tp_dictoffset */
2957 0, /* tp_init */
2958 0, /* tp_alloc */
2959 basestring_new, /* tp_new */
2960 0, /* tp_free */
2961};
2962
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002963PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002964"str(object) -> string\n\
2965\n\
2966Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002967If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002968
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002969PyTypeObject PyString_Type = {
2970 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002971 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002972 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002973 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002974 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002975 (destructor)string_dealloc, /* tp_dealloc */
2976 (printfunc)string_print, /* tp_print */
2977 0, /* tp_getattr */
2978 0, /* tp_setattr */
2979 0, /* tp_compare */
2980 (reprfunc)string_repr, /* tp_repr */
2981 0, /* tp_as_number */
2982 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00002983 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002984 (hashfunc)string_hash, /* tp_hash */
2985 0, /* tp_call */
2986 (reprfunc)string_str, /* tp_str */
2987 PyObject_GenericGetAttr, /* tp_getattro */
2988 0, /* tp_setattro */
2989 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002990 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002991 string_doc, /* tp_doc */
2992 0, /* tp_traverse */
2993 0, /* tp_clear */
2994 (richcmpfunc)string_richcompare, /* tp_richcompare */
2995 0, /* tp_weaklistoffset */
2996 0, /* tp_iter */
2997 0, /* tp_iternext */
2998 string_methods, /* tp_methods */
2999 0, /* tp_members */
3000 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003001 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003002 0, /* tp_dict */
3003 0, /* tp_descr_get */
3004 0, /* tp_descr_set */
3005 0, /* tp_dictoffset */
3006 0, /* tp_init */
3007 0, /* tp_alloc */
3008 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003009 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003010};
3011
3012void
Fred Drakeba096332000-07-09 07:04:36 +00003013PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003014{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003015 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003016 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003017 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003018 if (w == NULL || !PyString_Check(*pv)) {
3019 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003020 *pv = NULL;
3021 return;
3022 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003023 v = string_concat((PyStringObject *) *pv, w);
3024 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003025 *pv = v;
3026}
3027
Guido van Rossum013142a1994-08-30 08:19:36 +00003028void
Fred Drakeba096332000-07-09 07:04:36 +00003029PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003030{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003031 PyString_Concat(pv, w);
3032 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003033}
3034
3035
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003036/* The following function breaks the notion that strings are immutable:
3037 it changes the size of a string. We get away with this only if there
3038 is only one module referencing the object. You can also think of it
3039 as creating a new string object and destroying the old one, only
3040 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003041 already be known to some other part of the code...
3042 Note that if there's not enough memory to resize the string, the original
3043 string object at *pv is deallocated, *pv is set to NULL, an "out of
3044 memory" exception is set, and -1 is returned. Else (on success) 0 is
3045 returned, and the value in *pv may or may not be the same as on input.
3046 As always, an extra byte is allocated for a trailing \0 byte (newsize
3047 does *not* include that), and a trailing \0 byte is stored.
3048*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003049
3050int
Fred Drakeba096332000-07-09 07:04:36 +00003051_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003052{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003053 register PyObject *v;
3054 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003055 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003056 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003057 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003058 Py_DECREF(v);
3059 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003060 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003061 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003062 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003063 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003064 _Py_ForgetReference(v);
3065 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003066 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003067 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003068 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003069 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003070 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003071 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003072 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003073 _Py_NewReference(*pv);
3074 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003075 sv->ob_size = newsize;
3076 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003077 return 0;
3078}
Guido van Rossume5372401993-03-16 12:15:04 +00003079
3080/* Helpers for formatstring */
3081
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003082static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003083getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003084{
3085 int argidx = *p_argidx;
3086 if (argidx < arglen) {
3087 (*p_argidx)++;
3088 if (arglen < 0)
3089 return args;
3090 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003091 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003092 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003093 PyErr_SetString(PyExc_TypeError,
3094 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003095 return NULL;
3096}
3097
Tim Peters38fd5b62000-09-21 05:43:11 +00003098/* Format codes
3099 * F_LJUST '-'
3100 * F_SIGN '+'
3101 * F_BLANK ' '
3102 * F_ALT '#'
3103 * F_ZERO '0'
3104 */
Guido van Rossume5372401993-03-16 12:15:04 +00003105#define F_LJUST (1<<0)
3106#define F_SIGN (1<<1)
3107#define F_BLANK (1<<2)
3108#define F_ALT (1<<3)
3109#define F_ZERO (1<<4)
3110
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003111static int
Fred Drakeba096332000-07-09 07:04:36 +00003112formatfloat(char *buf, size_t buflen, int flags,
3113 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003114{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003115 /* fmt = '%#.' + `prec` + `type`
3116 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003117 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003118 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003119 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003120 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003121 if (prec < 0)
3122 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003123 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3124 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003125 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3126 (flags&F_ALT) ? "#" : "",
3127 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003128 /* worst case length calc to ensure no buffer overrun:
3129 fmt = %#.<prec>g
3130 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003131 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003132 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3133 If prec=0 the effective precision is 1 (the leading digit is
3134 always given), therefore increase by one to 10+prec. */
3135 if (buflen <= (size_t)10 + (size_t)prec) {
3136 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003137 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003138 return -1;
3139 }
Tim Peters885d4572001-11-28 20:27:42 +00003140 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003141 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003142}
3143
Tim Peters38fd5b62000-09-21 05:43:11 +00003144/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3145 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3146 * Python's regular ints.
3147 * Return value: a new PyString*, or NULL if error.
3148 * . *pbuf is set to point into it,
3149 * *plen set to the # of chars following that.
3150 * Caller must decref it when done using pbuf.
3151 * The string starting at *pbuf is of the form
3152 * "-"? ("0x" | "0X")? digit+
3153 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003154 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003155 * There will be at least prec digits, zero-filled on the left if
3156 * necessary to get that many.
3157 * val object to be converted
3158 * flags bitmask of format flags; only F_ALT is looked at
3159 * prec minimum number of digits; 0-fill on left if needed
3160 * type a character in [duoxX]; u acts the same as d
3161 *
3162 * CAUTION: o, x and X conversions on regular ints can never
3163 * produce a '-' sign, but can for Python's unbounded ints.
3164 */
3165PyObject*
3166_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3167 char **pbuf, int *plen)
3168{
3169 PyObject *result = NULL;
3170 char *buf;
3171 int i;
3172 int sign; /* 1 if '-', else 0 */
3173 int len; /* number of characters */
3174 int numdigits; /* len == numnondigits + numdigits */
3175 int numnondigits = 0;
3176
3177 switch (type) {
3178 case 'd':
3179 case 'u':
3180 result = val->ob_type->tp_str(val);
3181 break;
3182 case 'o':
3183 result = val->ob_type->tp_as_number->nb_oct(val);
3184 break;
3185 case 'x':
3186 case 'X':
3187 numnondigits = 2;
3188 result = val->ob_type->tp_as_number->nb_hex(val);
3189 break;
3190 default:
3191 assert(!"'type' not in [duoxX]");
3192 }
3193 if (!result)
3194 return NULL;
3195
3196 /* To modify the string in-place, there can only be one reference. */
3197 if (result->ob_refcnt != 1) {
3198 PyErr_BadInternalCall();
3199 return NULL;
3200 }
3201 buf = PyString_AsString(result);
3202 len = PyString_Size(result);
3203 if (buf[len-1] == 'L') {
3204 --len;
3205 buf[len] = '\0';
3206 }
3207 sign = buf[0] == '-';
3208 numnondigits += sign;
3209 numdigits = len - numnondigits;
3210 assert(numdigits > 0);
3211
Tim Petersfff53252001-04-12 18:38:48 +00003212 /* Get rid of base marker unless F_ALT */
3213 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003214 /* Need to skip 0x, 0X or 0. */
3215 int skipped = 0;
3216 switch (type) {
3217 case 'o':
3218 assert(buf[sign] == '0');
3219 /* If 0 is only digit, leave it alone. */
3220 if (numdigits > 1) {
3221 skipped = 1;
3222 --numdigits;
3223 }
3224 break;
3225 case 'x':
3226 case 'X':
3227 assert(buf[sign] == '0');
3228 assert(buf[sign + 1] == 'x');
3229 skipped = 2;
3230 numnondigits -= 2;
3231 break;
3232 }
3233 if (skipped) {
3234 buf += skipped;
3235 len -= skipped;
3236 if (sign)
3237 buf[0] = '-';
3238 }
3239 assert(len == numnondigits + numdigits);
3240 assert(numdigits > 0);
3241 }
3242
3243 /* Fill with leading zeroes to meet minimum width. */
3244 if (prec > numdigits) {
3245 PyObject *r1 = PyString_FromStringAndSize(NULL,
3246 numnondigits + prec);
3247 char *b1;
3248 if (!r1) {
3249 Py_DECREF(result);
3250 return NULL;
3251 }
3252 b1 = PyString_AS_STRING(r1);
3253 for (i = 0; i < numnondigits; ++i)
3254 *b1++ = *buf++;
3255 for (i = 0; i < prec - numdigits; i++)
3256 *b1++ = '0';
3257 for (i = 0; i < numdigits; i++)
3258 *b1++ = *buf++;
3259 *b1 = '\0';
3260 Py_DECREF(result);
3261 result = r1;
3262 buf = PyString_AS_STRING(result);
3263 len = numnondigits + prec;
3264 }
3265
3266 /* Fix up case for hex conversions. */
3267 switch (type) {
3268 case 'x':
3269 /* Need to convert all upper case letters to lower case. */
3270 for (i = 0; i < len; i++)
3271 if (buf[i] >= 'A' && buf[i] <= 'F')
3272 buf[i] += 'a'-'A';
3273 break;
3274 case 'X':
3275 /* Need to convert 0x to 0X (and -0x to -0X). */
3276 if (buf[sign + 1] == 'x')
3277 buf[sign + 1] = 'X';
3278 break;
3279 }
3280 *pbuf = buf;
3281 *plen = len;
3282 return result;
3283}
3284
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003285static int
Fred Drakeba096332000-07-09 07:04:36 +00003286formatint(char *buf, size_t buflen, int flags,
3287 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003288{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003289 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003290 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3291 + 1 + 1 = 24 */
3292 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003293 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003294
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003295 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003296 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003297 if (prec < 0)
3298 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003299
3300 if ((flags & F_ALT) &&
3301 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003302 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003303 * of issues that cause pain:
3304 * - when 0 is being converted, the C standard leaves off
3305 * the '0x' or '0X', which is inconsistent with other
3306 * %#x/%#X conversions and inconsistent with Python's
3307 * hex() function
3308 * - there are platforms that violate the standard and
3309 * convert 0 with the '0x' or '0X'
3310 * (Metrowerks, Compaq Tru64)
3311 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003312 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003313 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003314 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003315 * We can achieve the desired consistency by inserting our
3316 * own '0x' or '0X' prefix, and substituting %x/%X in place
3317 * of %#x/%#X.
3318 *
3319 * Note that this is the same approach as used in
3320 * formatint() in unicodeobject.c
3321 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003322 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003323 type, prec, type);
3324 }
3325 else {
3326 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003327 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003328 prec, type);
3329 }
3330
Tim Peters38fd5b62000-09-21 05:43:11 +00003331 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003332 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3333 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003334 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003335 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003336 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003337 return -1;
3338 }
Tim Peters885d4572001-11-28 20:27:42 +00003339 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003340 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003341}
3342
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003343static int
Fred Drakeba096332000-07-09 07:04:36 +00003344formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003345{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003346 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003347 if (PyString_Check(v)) {
3348 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003349 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003350 }
3351 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003352 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003353 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003354 }
3355 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003356 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003357}
3358
Guido van Rossum013142a1994-08-30 08:19:36 +00003359
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003360/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3361
3362 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3363 chars are formatted. XXX This is a magic number. Each formatting
3364 routine does bounds checking to ensure no overflow, but a better
3365 solution may be to malloc a buffer of appropriate size for each
3366 format. For now, the current solution is sufficient.
3367*/
3368#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003369
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003370PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003371PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003372{
3373 char *fmt, *res;
3374 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003375 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003376 PyObject *result, *orig_args;
3377#ifdef Py_USING_UNICODE
3378 PyObject *v, *w;
3379#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003380 PyObject *dict = NULL;
3381 if (format == NULL || !PyString_Check(format) || args == NULL) {
3382 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003383 return NULL;
3384 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003385 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003386 fmt = PyString_AS_STRING(format);
3387 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003388 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003389 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003390 if (result == NULL)
3391 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003392 res = PyString_AsString(result);
3393 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003394 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003395 argidx = 0;
3396 }
3397 else {
3398 arglen = -1;
3399 argidx = -2;
3400 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003401 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003402 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003403 while (--fmtcnt >= 0) {
3404 if (*fmt != '%') {
3405 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003406 rescnt = fmtcnt + 100;
3407 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003408 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003409 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003410 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003411 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003412 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003413 }
3414 *res++ = *fmt++;
3415 }
3416 else {
3417 /* Got a format specifier */
3418 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003419 int width = -1;
3420 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003421 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003422 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003423 PyObject *v = NULL;
3424 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003425 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003426 int sign;
3427 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003428 char formatbuf[FORMATBUFLEN];
3429 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003430#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003431 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003432 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003433#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003434
Guido van Rossumda9c2711996-12-05 21:58:58 +00003435 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003436 if (*fmt == '(') {
3437 char *keystart;
3438 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003439 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003440 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003441
3442 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003443 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003444 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003445 goto error;
3446 }
3447 ++fmt;
3448 --fmtcnt;
3449 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003450 /* Skip over balanced parentheses */
3451 while (pcount > 0 && --fmtcnt >= 0) {
3452 if (*fmt == ')')
3453 --pcount;
3454 else if (*fmt == '(')
3455 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003456 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003457 }
3458 keylen = fmt - keystart - 1;
3459 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003460 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003461 "incomplete format key");
3462 goto error;
3463 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003464 key = PyString_FromStringAndSize(keystart,
3465 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003466 if (key == NULL)
3467 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003468 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003469 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003470 args_owned = 0;
3471 }
3472 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003473 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003474 if (args == NULL) {
3475 goto error;
3476 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003477 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003478 arglen = -1;
3479 argidx = -2;
3480 }
Guido van Rossume5372401993-03-16 12:15:04 +00003481 while (--fmtcnt >= 0) {
3482 switch (c = *fmt++) {
3483 case '-': flags |= F_LJUST; continue;
3484 case '+': flags |= F_SIGN; continue;
3485 case ' ': flags |= F_BLANK; continue;
3486 case '#': flags |= F_ALT; continue;
3487 case '0': flags |= F_ZERO; continue;
3488 }
3489 break;
3490 }
3491 if (c == '*') {
3492 v = getnextarg(args, arglen, &argidx);
3493 if (v == NULL)
3494 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003495 if (!PyInt_Check(v)) {
3496 PyErr_SetString(PyExc_TypeError,
3497 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003498 goto error;
3499 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003500 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003501 if (width < 0) {
3502 flags |= F_LJUST;
3503 width = -width;
3504 }
Guido van Rossume5372401993-03-16 12:15:04 +00003505 if (--fmtcnt >= 0)
3506 c = *fmt++;
3507 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003508 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003509 width = c - '0';
3510 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003511 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003512 if (!isdigit(c))
3513 break;
3514 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003515 PyErr_SetString(
3516 PyExc_ValueError,
3517 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003518 goto error;
3519 }
3520 width = width*10 + (c - '0');
3521 }
3522 }
3523 if (c == '.') {
3524 prec = 0;
3525 if (--fmtcnt >= 0)
3526 c = *fmt++;
3527 if (c == '*') {
3528 v = getnextarg(args, arglen, &argidx);
3529 if (v == NULL)
3530 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003531 if (!PyInt_Check(v)) {
3532 PyErr_SetString(
3533 PyExc_TypeError,
3534 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003535 goto error;
3536 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003537 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003538 if (prec < 0)
3539 prec = 0;
3540 if (--fmtcnt >= 0)
3541 c = *fmt++;
3542 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003543 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003544 prec = c - '0';
3545 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003546 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003547 if (!isdigit(c))
3548 break;
3549 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003550 PyErr_SetString(
3551 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003552 "prec too big");
3553 goto error;
3554 }
3555 prec = prec*10 + (c - '0');
3556 }
3557 }
3558 } /* prec */
3559 if (fmtcnt >= 0) {
3560 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003561 if (--fmtcnt >= 0)
3562 c = *fmt++;
3563 }
3564 }
3565 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003566 PyErr_SetString(PyExc_ValueError,
3567 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003568 goto error;
3569 }
3570 if (c != '%') {
3571 v = getnextarg(args, arglen, &argidx);
3572 if (v == NULL)
3573 goto error;
3574 }
3575 sign = 0;
3576 fill = ' ';
3577 switch (c) {
3578 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003579 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003580 len = 1;
3581 break;
3582 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003583 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003584#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003585 if (PyUnicode_Check(v)) {
3586 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003587 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003588 goto unicode;
3589 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003590#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003591 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003592 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003593 else
3594 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003595 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003596 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003597 if (!PyString_Check(temp)) {
3598 PyErr_SetString(PyExc_TypeError,
3599 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003600 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003601 goto error;
3602 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003603 pbuf = PyString_AS_STRING(temp);
3604 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003605 if (prec >= 0 && len > prec)
3606 len = prec;
3607 break;
3608 case 'i':
3609 case 'd':
3610 case 'u':
3611 case 'o':
3612 case 'x':
3613 case 'X':
3614 if (c == 'i')
3615 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003616 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003617 temp = _PyString_FormatLong(v, flags,
3618 prec, c, &pbuf, &len);
3619 if (!temp)
3620 goto error;
3621 /* unbounded ints can always produce
3622 a sign character! */
3623 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003624 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003625 else {
3626 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003627 len = formatint(pbuf,
3628 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003629 flags, prec, c, v);
3630 if (len < 0)
3631 goto error;
3632 /* only d conversion is signed */
3633 sign = c == 'd';
3634 }
3635 if (flags & F_ZERO)
3636 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003637 break;
3638 case 'e':
3639 case 'E':
3640 case 'f':
3641 case 'g':
3642 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003643 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003644 len = formatfloat(pbuf, sizeof(formatbuf),
3645 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003646 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003647 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003648 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003649 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003650 fill = '0';
3651 break;
3652 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003653 pbuf = formatbuf;
3654 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003655 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003656 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003657 break;
3658 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003659 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003660 "unsupported format character '%c' (0x%x) "
3661 "at index %i",
3662 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003663 goto error;
3664 }
3665 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003666 if (*pbuf == '-' || *pbuf == '+') {
3667 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003668 len--;
3669 }
3670 else if (flags & F_SIGN)
3671 sign = '+';
3672 else if (flags & F_BLANK)
3673 sign = ' ';
3674 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003675 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003676 }
3677 if (width < len)
3678 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003679 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003680 reslen -= rescnt;
3681 rescnt = width + fmtcnt + 100;
3682 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003683 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003684 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003685 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003686 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003687 }
3688 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003689 if (fill != ' ')
3690 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003691 rescnt--;
3692 if (width > len)
3693 width--;
3694 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003695 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3696 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003697 assert(pbuf[1] == c);
3698 if (fill != ' ') {
3699 *res++ = *pbuf++;
3700 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003701 }
Tim Petersfff53252001-04-12 18:38:48 +00003702 rescnt -= 2;
3703 width -= 2;
3704 if (width < 0)
3705 width = 0;
3706 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003707 }
3708 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003709 do {
3710 --rescnt;
3711 *res++ = fill;
3712 } while (--width > len);
3713 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003714 if (fill == ' ') {
3715 if (sign)
3716 *res++ = sign;
3717 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003718 (c == 'x' || c == 'X')) {
3719 assert(pbuf[0] == '0');
3720 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003721 *res++ = *pbuf++;
3722 *res++ = *pbuf++;
3723 }
3724 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003725 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003726 res += len;
3727 rescnt -= len;
3728 while (--width >= len) {
3729 --rescnt;
3730 *res++ = ' ';
3731 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003732 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003733 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003734 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003735 goto error;
3736 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003737 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003738 } /* '%' */
3739 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003740 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003741 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003742 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003743 goto error;
3744 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003745 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003746 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003747 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003748 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003749 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003750
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003751#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003752 unicode:
3753 if (args_owned) {
3754 Py_DECREF(args);
3755 args_owned = 0;
3756 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003757 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003758 if (PyTuple_Check(orig_args) && argidx > 0) {
3759 PyObject *v;
3760 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3761 v = PyTuple_New(n);
3762 if (v == NULL)
3763 goto error;
3764 while (--n >= 0) {
3765 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3766 Py_INCREF(w);
3767 PyTuple_SET_ITEM(v, n, w);
3768 }
3769 args = v;
3770 } else {
3771 Py_INCREF(orig_args);
3772 args = orig_args;
3773 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003774 args_owned = 1;
3775 /* Take what we have of the result and let the Unicode formatting
3776 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003777 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003778 if (_PyString_Resize(&result, rescnt))
3779 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003780 fmtcnt = PyString_GET_SIZE(format) - \
3781 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003782 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3783 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003784 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003785 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003786 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003787 if (v == NULL)
3788 goto error;
3789 /* Paste what we have (result) to what the Unicode formatting
3790 function returned (v) and return the result (or error) */
3791 w = PyUnicode_Concat(result, v);
3792 Py_DECREF(result);
3793 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003794 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003795 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003796#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003797
Guido van Rossume5372401993-03-16 12:15:04 +00003798 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003799 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003800 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003801 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003802 }
Guido van Rossume5372401993-03-16 12:15:04 +00003803 return NULL;
3804}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003805
3806
Guido van Rossum2a61e741997-01-18 07:55:05 +00003807
Barry Warsaw4df762f2000-08-16 23:41:01 +00003808/* This dictionary will leak at PyString_Fini() time. That's acceptable
3809 * because PyString_Fini() specifically frees interned strings that are
3810 * only referenced by this dictionary. The CVS log entry for revision 2.45
3811 * says:
3812 *
3813 * Change the Fini function to only remove otherwise unreferenced
3814 * strings from the interned table. There are references in
3815 * hard-to-find static variables all over the interpreter, and it's not
3816 * worth trying to get rid of all those; but "uninterning" isn't fair
3817 * either and may cause subtle failures later -- so we have to keep them
3818 * in the interned table.
3819 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003820static PyObject *interned;
3821
3822void
Fred Drakeba096332000-07-09 07:04:36 +00003823PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003824{
3825 register PyStringObject *s = (PyStringObject *)(*p);
3826 PyObject *t;
3827 if (s == NULL || !PyString_Check(s))
3828 Py_FatalError("PyString_InternInPlace: strings only please!");
3829 if ((t = s->ob_sinterned) != NULL) {
3830 if (t == (PyObject *)s)
3831 return;
3832 Py_INCREF(t);
3833 *p = t;
3834 Py_DECREF(s);
3835 return;
3836 }
3837 if (interned == NULL) {
3838 interned = PyDict_New();
3839 if (interned == NULL)
3840 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003841 }
3842 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3843 Py_INCREF(t);
3844 *p = s->ob_sinterned = t;
3845 Py_DECREF(s);
3846 return;
3847 }
Tim Peters111f6092001-09-12 07:54:51 +00003848 /* Ensure that only true string objects appear in the intern dict,
3849 and as the value of ob_sinterned. */
3850 if (PyString_CheckExact(s)) {
3851 t = (PyObject *)s;
3852 if (PyDict_SetItem(interned, t, t) == 0) {
3853 s->ob_sinterned = t;
3854 return;
3855 }
3856 }
3857 else {
3858 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3859 PyString_GET_SIZE(s));
3860 if (t != NULL) {
3861 if (PyDict_SetItem(interned, t, t) == 0) {
3862 *p = s->ob_sinterned = t;
3863 Py_DECREF(s);
3864 return;
3865 }
3866 Py_DECREF(t);
3867 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003868 }
3869 PyErr_Clear();
3870}
3871
3872
3873PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003874PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003875{
3876 PyObject *s = PyString_FromString(cp);
3877 if (s == NULL)
3878 return NULL;
3879 PyString_InternInPlace(&s);
3880 return s;
3881}
3882
Guido van Rossum8cf04761997-08-02 02:57:45 +00003883void
Fred Drakeba096332000-07-09 07:04:36 +00003884PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003885{
3886 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003887 for (i = 0; i < UCHAR_MAX + 1; i++) {
3888 Py_XDECREF(characters[i]);
3889 characters[i] = NULL;
3890 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003891 Py_XDECREF(nullstring);
3892 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003893 if (interned) {
3894 int pos, changed;
3895 PyObject *key, *value;
3896 do {
3897 changed = 0;
3898 pos = 0;
3899 while (PyDict_Next(interned, &pos, &key, &value)) {
3900 if (key->ob_refcnt == 2 && key == value) {
3901 PyDict_DelItem(interned, key);
3902 changed = 1;
3903 }
3904 }
3905 } while (changed);
3906 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003907}
Barry Warsawa903ad982001-02-23 16:40:48 +00003908
Barry Warsawa903ad982001-02-23 16:40:48 +00003909void _Py_ReleaseInternedStrings(void)
3910{
3911 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003912 fprintf(stderr, "releasing interned strings\n");
3913 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003914 Py_DECREF(interned);
3915 interned = NULL;
3916 }
3917}