blob: 1d5277c0c78e3064b5a6b61c59037fad7a7bd441 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
18/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000019 For both PyString_FromString() and PyString_FromStringAndSize(), the
20 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000021 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000022
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000023 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000025
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For PyString_FromStringAndSize(), the parameter the parameter `str' is
27 either NULL or else points to a string containing at least `size' bytes.
28 For PyString_FromStringAndSize(), the string in the `str' parameter does
29 not have to be null-terminated. (Therefore it is safe to construct a
30 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
31 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
32 bytes (setting the last byte to the null terminating character) and you can
33 fill in the data yourself. If `str' is non-NULL then the resulting
34 PyString object must be treated as immutable and you must not fill in nor
35 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 The PyObject member `op->ob_size', which denotes the number of "extra
38 items" in a variable-size object, will contain the number of bytes
39 allocated for string data, not counting the null terminating character. It
40 is therefore equal to the equal to the `size' parameter (for
41 PyString_FromStringAndSize()) or the length of the string in the `str'
42 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000067 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000123 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000159#ifdef __va_copy
160 __va_copy(count, vargs);
161#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000162 count = vargs;
163#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000164#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000165 /* step 1: figure out how large a buffer we need */
166 for (f = format; *f; f++) {
167 if (*f == '%') {
168 const char* p = f;
169 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
170 ;
171
172 /* skip the 'l' in %ld, since it doesn't change the
173 width. although only %d is supported (see
174 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000175 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000176 if (*f == 'l' && *(f+1) == 'd')
177 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000178
Barry Warsawdadace02001-08-24 18:32:06 +0000179 switch (*f) {
180 case 'c':
181 (void)va_arg(count, int);
182 /* fall through... */
183 case '%':
184 n++;
185 break;
186 case 'd': case 'i': case 'x':
187 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000188 /* 20 bytes is enough to hold a 64-bit
189 integer. Decimal takes the most space.
190 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000191 n += 20;
192 break;
193 case 's':
194 s = va_arg(count, char*);
195 n += strlen(s);
196 break;
197 case 'p':
198 (void) va_arg(count, int);
199 /* maximum 64-bit pointer representation:
200 * 0xffffffffffffffff
201 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000202 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000203 */
204 n += 19;
205 break;
206 default:
207 /* if we stumble upon an unknown
208 formatting code, copy the rest of
209 the format string to the output
210 string. (we cannot just skip the
211 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000212 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000213 n += strlen(p);
214 goto expand;
215 }
216 } else
217 n++;
218 }
219 expand:
220 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000221 /* Since we've analyzed how much space we need for the worst case,
222 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000223 string = PyString_FromStringAndSize(NULL, n);
224 if (!string)
225 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000226
Barry Warsawdadace02001-08-24 18:32:06 +0000227 s = PyString_AsString(string);
228
229 for (f = format; *f; f++) {
230 if (*f == '%') {
231 const char* p = f++;
232 int i, longflag = 0;
233 /* parse the width.precision part (we're only
234 interested in the precision value, if any) */
235 n = 0;
236 while (isdigit(Py_CHARMASK(*f)))
237 n = (n*10) + *f++ - '0';
238 if (*f == '.') {
239 f++;
240 n = 0;
241 while (isdigit(Py_CHARMASK(*f)))
242 n = (n*10) + *f++ - '0';
243 }
244 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
245 f++;
246 /* handle the long flag, but only for %ld. others
247 can be added when necessary. */
248 if (*f == 'l' && *(f+1) == 'd') {
249 longflag = 1;
250 ++f;
251 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000252
Barry Warsawdadace02001-08-24 18:32:06 +0000253 switch (*f) {
254 case 'c':
255 *s++ = va_arg(vargs, int);
256 break;
257 case 'd':
258 if (longflag)
259 sprintf(s, "%ld", va_arg(vargs, long));
260 else
261 sprintf(s, "%d", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'i':
265 sprintf(s, "%i", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 'x':
269 sprintf(s, "%x", va_arg(vargs, int));
270 s += strlen(s);
271 break;
272 case 's':
273 p = va_arg(vargs, char*);
274 i = strlen(p);
275 if (n > 0 && i > n)
276 i = n;
277 memcpy(s, p, i);
278 s += i;
279 break;
280 case 'p':
281 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000282 /* %p is ill-defined: ensure leading 0x. */
283 if (s[1] == 'X')
284 s[1] = 'x';
285 else if (s[1] != 'x') {
286 memmove(s+2, s, strlen(s)+1);
287 s[0] = '0';
288 s[1] = 'x';
289 }
Barry Warsawdadace02001-08-24 18:32:06 +0000290 s += strlen(s);
291 break;
292 case '%':
293 *s++ = '%';
294 break;
295 default:
296 strcpy(s, p);
297 s += strlen(s);
298 goto end;
299 }
300 } else
301 *s++ = *f;
302 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000303
Barry Warsawdadace02001-08-24 18:32:06 +0000304 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000305 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000306 return string;
307}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000308
Barry Warsawdadace02001-08-24 18:32:06 +0000309PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000310PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000311{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000312 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000313 va_list vargs;
314
315#ifdef HAVE_STDARG_PROTOTYPES
316 va_start(vargs, format);
317#else
318 va_start(vargs);
319#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000320 ret = PyString_FromFormatV(format, vargs);
321 va_end(vargs);
322 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000323}
324
325
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000326PyObject *PyString_Decode(const char *s,
327 int size,
328 const char *encoding,
329 const char *errors)
330{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000331 PyObject *v, *str;
332
333 str = PyString_FromStringAndSize(s, size);
334 if (str == NULL)
335 return NULL;
336 v = PyString_AsDecodedString(str, encoding, errors);
337 Py_DECREF(str);
338 return v;
339}
340
341PyObject *PyString_AsDecodedObject(PyObject *str,
342 const char *encoding,
343 const char *errors)
344{
345 PyObject *v;
346
347 if (!PyString_Check(str)) {
348 PyErr_BadArgument();
349 goto onError;
350 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000351
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000352 if (encoding == NULL) {
353#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000355#else
356 PyErr_SetString(PyExc_ValueError, "no encoding specified");
357 goto onError;
358#endif
359 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360
361 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000362 v = PyCodec_Decode(str, encoding, errors);
363 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365
366 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000367
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000368 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000369 return NULL;
370}
371
372PyObject *PyString_AsDecodedString(PyObject *str,
373 const char *encoding,
374 const char *errors)
375{
376 PyObject *v;
377
378 v = PyString_AsDecodedObject(str, encoding, errors);
379 if (v == NULL)
380 goto onError;
381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000383 /* Convert Unicode to a string using the default encoding */
384 if (PyUnicode_Check(v)) {
385 PyObject *temp = v;
386 v = PyUnicode_AsEncodedString(v, NULL, NULL);
387 Py_DECREF(temp);
388 if (v == NULL)
389 goto onError;
390 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000391#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 if (!PyString_Check(v)) {
393 PyErr_Format(PyExc_TypeError,
394 "decoder did not return a string object (type=%.400s)",
395 v->ob_type->tp_name);
396 Py_DECREF(v);
397 goto onError;
398 }
399
400 return v;
401
402 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 return NULL;
404}
405
406PyObject *PyString_Encode(const char *s,
407 int size,
408 const char *encoding,
409 const char *errors)
410{
411 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000412
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000413 str = PyString_FromStringAndSize(s, size);
414 if (str == NULL)
415 return NULL;
416 v = PyString_AsEncodedString(str, encoding, errors);
417 Py_DECREF(str);
418 return v;
419}
420
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000422 const char *encoding,
423 const char *errors)
424{
425 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000426
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 if (!PyString_Check(str)) {
428 PyErr_BadArgument();
429 goto onError;
430 }
431
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000432 if (encoding == NULL) {
433#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000435#else
436 PyErr_SetString(PyExc_ValueError, "no encoding specified");
437 goto onError;
438#endif
439 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000440
441 /* Encode via the codec registry */
442 v = PyCodec_Encode(str, encoding, errors);
443 if (v == NULL)
444 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000445
446 return v;
447
448 onError:
449 return NULL;
450}
451
452PyObject *PyString_AsEncodedString(PyObject *str,
453 const char *encoding,
454 const char *errors)
455{
456 PyObject *v;
457
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000458 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000459 if (v == NULL)
460 goto onError;
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 /* Convert Unicode to a string using the default encoding */
464 if (PyUnicode_Check(v)) {
465 PyObject *temp = v;
466 v = PyUnicode_AsEncodedString(v, NULL, NULL);
467 Py_DECREF(temp);
468 if (v == NULL)
469 goto onError;
470 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000471#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000472 if (!PyString_Check(v)) {
473 PyErr_Format(PyExc_TypeError,
474 "encoder did not return a string object (type=%.400s)",
475 v->ob_type->tp_name);
476 Py_DECREF(v);
477 goto onError;
478 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000479
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000480 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000481
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000482 onError:
483 return NULL;
484}
485
Guido van Rossum234f9421993-06-17 12:35:49 +0000486static void
Fred Drakeba096332000-07-09 07:04:36 +0000487string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000488{
Guido van Rossum9475a232001-10-05 20:51:39 +0000489 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000490}
491
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000492static int
493string_getsize(register PyObject *op)
494{
495 char *s;
496 int len;
497 if (PyString_AsStringAndSize(op, &s, &len))
498 return -1;
499 return len;
500}
501
502static /*const*/ char *
503string_getbuffer(register PyObject *op)
504{
505 char *s;
506 int len;
507 if (PyString_AsStringAndSize(op, &s, &len))
508 return NULL;
509 return s;
510}
511
Guido van Rossumd7047b31995-01-02 19:07:15 +0000512int
Fred Drakeba096332000-07-09 07:04:36 +0000513PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000515 if (!PyString_Check(op))
516 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000517 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518}
519
520/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000521PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000523 if (!PyString_Check(op))
524 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000525 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000526}
527
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000528int
529PyString_AsStringAndSize(register PyObject *obj,
530 register char **s,
531 register int *len)
532{
533 if (s == NULL) {
534 PyErr_BadInternalCall();
535 return -1;
536 }
537
538 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000539#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000540 if (PyUnicode_Check(obj)) {
541 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
542 if (obj == NULL)
543 return -1;
544 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000545 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000546#endif
547 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000548 PyErr_Format(PyExc_TypeError,
549 "expected string or Unicode object, "
550 "%.200s found", obj->ob_type->tp_name);
551 return -1;
552 }
553 }
554
555 *s = PyString_AS_STRING(obj);
556 if (len != NULL)
557 *len = PyString_GET_SIZE(obj);
558 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
559 PyErr_SetString(PyExc_TypeError,
560 "expected string without null bytes");
561 return -1;
562 }
563 return 0;
564}
565
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566/* Methods */
567
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000568static int
Fred Drakeba096332000-07-09 07:04:36 +0000569string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000570{
571 int i;
572 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000573 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000574
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000575 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000576 if (! PyString_CheckExact(op)) {
577 int ret;
578 /* A str subclass may have its own __str__ method. */
579 op = (PyStringObject *) PyObject_Str((PyObject *)op);
580 if (op == NULL)
581 return -1;
582 ret = string_print(op, fp, flags);
583 Py_DECREF(op);
584 return ret;
585 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000587 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000588 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000589 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000590
Thomas Wouters7e474022000-07-16 12:04:32 +0000591 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000592 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000593 if (memchr(op->ob_sval, '\'', op->ob_size) &&
594 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000595 quote = '"';
596
597 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000598 for (i = 0; i < op->ob_size; i++) {
599 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000600 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000601 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000602 else if (c == '\t')
603 fprintf(fp, "\\t");
604 else if (c == '\n')
605 fprintf(fp, "\\n");
606 else if (c == '\r')
607 fprintf(fp, "\\r");
608 else if (c < ' ' || c >= 0x7f)
609 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000610 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000611 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000612 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000613 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000614 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615}
616
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000617static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000618string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000619{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000620 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
621 PyObject *v;
622 if (newsize > INT_MAX) {
623 PyErr_SetString(PyExc_OverflowError,
624 "string is too large to make repr");
625 }
626 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000627 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000628 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000629 }
630 else {
631 register int i;
632 register char c;
633 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000634 int quote;
635
Thomas Wouters7e474022000-07-16 12:04:32 +0000636 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000637 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000638 if (memchr(op->ob_sval, '\'', op->ob_size) &&
639 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000640 quote = '"';
641
Tim Peters9161c8b2001-12-03 01:55:38 +0000642 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000643 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000645 /* There's at least enough room for a hex escape
646 and a closing quote. */
647 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000648 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000649 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000651 else if (c == '\t')
652 *p++ = '\\', *p++ = 't';
653 else if (c == '\n')
654 *p++ = '\\', *p++ = 'n';
655 else if (c == '\r')
656 *p++ = '\\', *p++ = 'r';
657 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000658 /* For performance, we don't want to call
659 PyOS_snprintf here (extra layers of
660 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000661 sprintf(p, "\\x%02x", c & 0xff);
662 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663 }
664 else
665 *p++ = c;
666 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000667 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000668 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000670 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000671 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000672 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000673 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000674}
675
Guido van Rossum189f1df2001-05-01 16:51:53 +0000676static PyObject *
677string_str(PyObject *s)
678{
Tim Petersc9933152001-10-16 20:18:24 +0000679 assert(PyString_Check(s));
680 if (PyString_CheckExact(s)) {
681 Py_INCREF(s);
682 return s;
683 }
684 else {
685 /* Subtype -- return genuine string with the same value. */
686 PyStringObject *t = (PyStringObject *) s;
687 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
688 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000689}
690
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000691static int
Fred Drakeba096332000-07-09 07:04:36 +0000692string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693{
694 return a->ob_size;
695}
696
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000697static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000698string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699{
700 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000701 register PyStringObject *op;
702 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000703#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000704 if (PyUnicode_Check(bb))
705 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000706#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000707 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000708 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000709 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000710 return NULL;
711 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000712#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000713 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000714 if ((a->ob_size == 0 || b->ob_size == 0) &&
715 PyString_CheckExact(a) && PyString_CheckExact(b)) {
716 if (a->ob_size == 0) {
717 Py_INCREF(bb);
718 return bb;
719 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 Py_INCREF(a);
721 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722 }
723 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000724 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000725 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000726 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000727 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000729 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000730 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000731 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000732 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
733 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
734 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000736#undef b
737}
738
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000739static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000740string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000741{
742 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000743 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000744 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000745 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000746 if (n < 0)
747 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000748 /* watch out for overflows: the size can overflow int,
749 * and the # of bytes needed can overflow size_t
750 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000751 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000752 if (n && size / n != a->ob_size) {
753 PyErr_SetString(PyExc_OverflowError,
754 "repeated string is too long");
755 return NULL;
756 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000757 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000758 Py_INCREF(a);
759 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000760 }
Tim Peters8f422462000-09-09 06:13:41 +0000761 nbytes = size * sizeof(char);
762 if (nbytes / sizeof(char) != (size_t)size ||
763 nbytes + sizeof(PyStringObject) <= nbytes) {
764 PyErr_SetString(PyExc_OverflowError,
765 "repeated string is too long");
766 return NULL;
767 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000768 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000769 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000770 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000771 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000772 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000773 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000774 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000775 for (i = 0; i < size; i += a->ob_size)
776 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
777 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000778 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779}
780
781/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
782
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000783static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000784string_slice(register PyStringObject *a, register int i, register int j)
785 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000786{
787 if (i < 0)
788 i = 0;
789 if (j < 0)
790 j = 0; /* Avoid signed/unsigned bug in next line */
791 if (j > a->ob_size)
792 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000793 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
794 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000795 Py_INCREF(a);
796 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797 }
798 if (j < i)
799 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000800 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000801}
802
Guido van Rossum9284a572000-03-07 15:53:43 +0000803static int
Fred Drakeba096332000-07-09 07:04:36 +0000804string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000805{
Barry Warsaw817918c2002-08-06 16:58:21 +0000806 const char *lhs, *rhs, *end;
807 int size;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000808#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000809 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000810 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000811#endif
Barry Warsaw817918c2002-08-06 16:58:21 +0000812 if (!PyString_Check(el)) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000813 PyErr_SetString(PyExc_TypeError,
Barry Warsaw817918c2002-08-06 16:58:21 +0000814 "'in <string>' requires string as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000815 return -1;
816 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000817 size = PyString_Size(el);
818 rhs = PyString_AS_STRING(el);
819 lhs = PyString_AS_STRING(a);
820
821 /* optimize for a single character */
822 if (size == 1)
823 return memchr(lhs, *rhs, PyString_Size(a)) != NULL;
824
825 end = lhs + (PyString_Size(a) - size);
826 while (lhs <= end) {
827 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +0000828 return 1;
829 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000830
Guido van Rossum9284a572000-03-07 15:53:43 +0000831 return 0;
832}
833
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000834static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000835string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000837 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000838 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000839 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000840 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841 return NULL;
842 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000843 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000844 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000845 if (v == NULL)
846 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000847 else {
848#ifdef COUNT_ALLOCS
849 one_strings++;
850#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000851 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000852 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000853 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854}
855
Martin v. Löwiscd353062001-05-24 16:56:35 +0000856static PyObject*
857string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000858{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000859 int c;
860 int len_a, len_b;
861 int min_len;
862 PyObject *result;
863
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000864 /* Make sure both arguments are strings. */
865 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000866 result = Py_NotImplemented;
867 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000868 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000869 if (a == b) {
870 switch (op) {
871 case Py_EQ:case Py_LE:case Py_GE:
872 result = Py_True;
873 goto out;
874 case Py_NE:case Py_LT:case Py_GT:
875 result = Py_False;
876 goto out;
877 }
878 }
879 if (op == Py_EQ) {
880 /* Supporting Py_NE here as well does not save
881 much time, since Py_NE is rarely used. */
882 if (a->ob_size == b->ob_size
883 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000884 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +0000885 a->ob_size) == 0)) {
886 result = Py_True;
887 } else {
888 result = Py_False;
889 }
890 goto out;
891 }
892 len_a = a->ob_size; len_b = b->ob_size;
893 min_len = (len_a < len_b) ? len_a : len_b;
894 if (min_len > 0) {
895 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
896 if (c==0)
897 c = memcmp(a->ob_sval, b->ob_sval, min_len);
898 }else
899 c = 0;
900 if (c == 0)
901 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
902 switch (op) {
903 case Py_LT: c = c < 0; break;
904 case Py_LE: c = c <= 0; break;
905 case Py_EQ: assert(0); break; /* unreachable */
906 case Py_NE: c = c != 0; break;
907 case Py_GT: c = c > 0; break;
908 case Py_GE: c = c >= 0; break;
909 default:
910 result = Py_NotImplemented;
911 goto out;
912 }
913 result = c ? Py_True : Py_False;
914 out:
915 Py_INCREF(result);
916 return result;
917}
918
919int
920_PyString_Eq(PyObject *o1, PyObject *o2)
921{
922 PyStringObject *a, *b;
923 a = (PyStringObject*)o1;
924 b = (PyStringObject*)o2;
925 return a->ob_size == b->ob_size
926 && *a->ob_sval == *b->ob_sval
927 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000928}
929
Guido van Rossum9bfef441993-03-29 10:43:31 +0000930static long
Fred Drakeba096332000-07-09 07:04:36 +0000931string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000932{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000933 register int len;
934 register unsigned char *p;
935 register long x;
936
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000937 if (a->ob_shash != -1)
938 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000939 if (a->ob_sinterned != NULL)
940 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000942 len = a->ob_size;
943 p = (unsigned char *) a->ob_sval;
944 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000945 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000946 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000947 x ^= a->ob_size;
948 if (x == -1)
949 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000950 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000951 return x;
952}
953
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000954static PyObject*
955string_subscript(PyStringObject* self, PyObject* item)
956{
957 if (PyInt_Check(item)) {
958 long i = PyInt_AS_LONG(item);
959 if (i < 0)
960 i += PyString_GET_SIZE(self);
961 return string_item(self,i);
962 }
963 else if (PyLong_Check(item)) {
964 long i = PyLong_AsLong(item);
965 if (i == -1 && PyErr_Occurred())
966 return NULL;
967 if (i < 0)
968 i += PyString_GET_SIZE(self);
969 return string_item(self,i);
970 }
971 else if (PySlice_Check(item)) {
972 int start, stop, step, slicelength, cur, i;
973 char* source_buf;
974 char* result_buf;
975 PyObject* result;
976
977 if (PySlice_GetIndicesEx((PySliceObject*)item,
978 PyString_GET_SIZE(self),
979 &start, &stop, &step, &slicelength) < 0) {
980 return NULL;
981 }
982
983 if (slicelength <= 0) {
984 return PyString_FromStringAndSize("", 0);
985 }
986 else {
987 source_buf = PyString_AsString((PyObject*)self);
988 result_buf = PyMem_Malloc(slicelength);
989
990 for (cur = start, i = 0; i < slicelength;
991 cur += step, i++) {
992 result_buf[i] = source_buf[cur];
993 }
994
995 result = PyString_FromStringAndSize(result_buf,
996 slicelength);
997 PyMem_Free(result_buf);
998 return result;
999 }
1000 }
1001 else {
1002 PyErr_SetString(PyExc_TypeError,
1003 "string indices must be integers");
1004 return NULL;
1005 }
1006}
1007
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001008static int
Fred Drakeba096332000-07-09 07:04:36 +00001009string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001010{
1011 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001012 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001013 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001014 return -1;
1015 }
1016 *ptr = (void *)self->ob_sval;
1017 return self->ob_size;
1018}
1019
1020static int
Fred Drakeba096332000-07-09 07:04:36 +00001021string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001022{
Guido van Rossum045e6881997-09-08 18:30:11 +00001023 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001024 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001025 return -1;
1026}
1027
1028static int
Fred Drakeba096332000-07-09 07:04:36 +00001029string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001030{
1031 if ( lenp )
1032 *lenp = self->ob_size;
1033 return 1;
1034}
1035
Guido van Rossum1db70701998-10-08 02:18:52 +00001036static int
Fred Drakeba096332000-07-09 07:04:36 +00001037string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001038{
1039 if ( index != 0 ) {
1040 PyErr_SetString(PyExc_SystemError,
1041 "accessing non-existent string segment");
1042 return -1;
1043 }
1044 *ptr = self->ob_sval;
1045 return self->ob_size;
1046}
1047
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001048static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001049 (inquiry)string_length, /*sq_length*/
1050 (binaryfunc)string_concat, /*sq_concat*/
1051 (intargfunc)string_repeat, /*sq_repeat*/
1052 (intargfunc)string_item, /*sq_item*/
1053 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001054 0, /*sq_ass_item*/
1055 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001056 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001057};
1058
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001059static PyMappingMethods string_as_mapping = {
1060 (inquiry)string_length,
1061 (binaryfunc)string_subscript,
1062 0,
1063};
1064
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001065static PyBufferProcs string_as_buffer = {
1066 (getreadbufferproc)string_buffer_getreadbuf,
1067 (getwritebufferproc)string_buffer_getwritebuf,
1068 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001069 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001070};
1071
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001072
1073
1074#define LEFTSTRIP 0
1075#define RIGHTSTRIP 1
1076#define BOTHSTRIP 2
1077
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001078/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001079static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1080
1081#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001082
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001083
1084static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001085split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001087 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088 PyObject* item;
1089 PyObject *list = PyList_New(0);
1090
1091 if (list == NULL)
1092 return NULL;
1093
Guido van Rossum4c08d552000-03-10 22:55:18 +00001094 for (i = j = 0; i < len; ) {
1095 while (i < len && isspace(Py_CHARMASK(s[i])))
1096 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001097 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001098 while (i < len && !isspace(Py_CHARMASK(s[i])))
1099 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001100 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001101 if (maxsplit-- <= 0)
1102 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001103 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1104 if (item == NULL)
1105 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001106 err = PyList_Append(list, item);
1107 Py_DECREF(item);
1108 if (err < 0)
1109 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001110 while (i < len && isspace(Py_CHARMASK(s[i])))
1111 i++;
1112 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113 }
1114 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001115 if (j < len) {
1116 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1117 if (item == NULL)
1118 goto finally;
1119 err = PyList_Append(list, item);
1120 Py_DECREF(item);
1121 if (err < 0)
1122 goto finally;
1123 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001124 return list;
1125 finally:
1126 Py_DECREF(list);
1127 return NULL;
1128}
1129
1130
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001131PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001132"S.split([sep [,maxsplit]]) -> list of strings\n\
1133\n\
1134Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001135delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001136splits are done. If sep is not specified or is None, any\n\
1137whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138
1139static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001140string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141{
1142 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001143 int maxsplit = -1;
1144 const char *s = PyString_AS_STRING(self), *sub;
1145 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001146
Guido van Rossum4c08d552000-03-10 22:55:18 +00001147 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001148 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001149 if (maxsplit < 0)
1150 maxsplit = INT_MAX;
1151 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001153 if (PyString_Check(subobj)) {
1154 sub = PyString_AS_STRING(subobj);
1155 n = PyString_GET_SIZE(subobj);
1156 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001157#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001158 else if (PyUnicode_Check(subobj))
1159 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001160#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001161 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1162 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001163 if (n == 0) {
1164 PyErr_SetString(PyExc_ValueError, "empty separator");
1165 return NULL;
1166 }
1167
1168 list = PyList_New(0);
1169 if (list == NULL)
1170 return NULL;
1171
1172 i = j = 0;
1173 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001174 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001175 if (maxsplit-- <= 0)
1176 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001177 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1178 if (item == NULL)
1179 goto fail;
1180 err = PyList_Append(list, item);
1181 Py_DECREF(item);
1182 if (err < 0)
1183 goto fail;
1184 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001185 }
1186 else
1187 i++;
1188 }
1189 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1190 if (item == NULL)
1191 goto fail;
1192 err = PyList_Append(list, item);
1193 Py_DECREF(item);
1194 if (err < 0)
1195 goto fail;
1196
1197 return list;
1198
1199 fail:
1200 Py_DECREF(list);
1201 return NULL;
1202}
1203
1204
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001205PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001206"S.join(sequence) -> string\n\
1207\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001208Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001209sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001210
1211static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001212string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213{
1214 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001215 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001216 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001217 char *p;
1218 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001219 size_t sz = 0;
1220 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001221 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001222
Tim Peters19fe14e2001-01-19 03:03:47 +00001223 seq = PySequence_Fast(orig, "");
1224 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001225 if (PyErr_ExceptionMatches(PyExc_TypeError))
1226 PyErr_Format(PyExc_TypeError,
1227 "sequence expected, %.80s found",
1228 orig->ob_type->tp_name);
1229 return NULL;
1230 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001231
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001232 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001233 if (seqlen == 0) {
1234 Py_DECREF(seq);
1235 return PyString_FromString("");
1236 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001237 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001238 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001239 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1240 PyErr_Format(PyExc_TypeError,
1241 "sequence item 0: expected string,"
1242 " %.80s found",
1243 item->ob_type->tp_name);
1244 Py_DECREF(seq);
1245 return NULL;
1246 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001247 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001248 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001249 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001250 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001251
Tim Peters19fe14e2001-01-19 03:03:47 +00001252 /* There are at least two things to join. Do a pre-pass to figure out
1253 * the total amount of space we'll need (sz), see whether any argument
1254 * is absurd, and defer to the Unicode join if appropriate.
1255 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001256 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001257 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001258 item = PySequence_Fast_GET_ITEM(seq, i);
1259 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001260#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001261 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001262 /* Defer to Unicode join.
1263 * CAUTION: There's no gurantee that the
1264 * original sequence can be iterated over
1265 * again, so we must pass seq here.
1266 */
1267 PyObject *result;
1268 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001269 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001270 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001271 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001272#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001273 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001274 "sequence item %i: expected string,"
1275 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001276 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001277 Py_DECREF(seq);
1278 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001279 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001280 sz += PyString_GET_SIZE(item);
1281 if (i != 0)
1282 sz += seplen;
1283 if (sz < old_sz || sz > INT_MAX) {
1284 PyErr_SetString(PyExc_OverflowError,
1285 "join() is too long for a Python string");
1286 Py_DECREF(seq);
1287 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001289 }
1290
1291 /* Allocate result space. */
1292 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1293 if (res == NULL) {
1294 Py_DECREF(seq);
1295 return NULL;
1296 }
1297
1298 /* Catenate everything. */
1299 p = PyString_AS_STRING(res);
1300 for (i = 0; i < seqlen; ++i) {
1301 size_t n;
1302 item = PySequence_Fast_GET_ITEM(seq, i);
1303 n = PyString_GET_SIZE(item);
1304 memcpy(p, PyString_AS_STRING(item), n);
1305 p += n;
1306 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001307 memcpy(p, sep, seplen);
1308 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001309 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001311
Jeremy Hylton49048292000-07-11 03:28:17 +00001312 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001313 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001314}
1315
Tim Peters52e155e2001-06-16 05:42:57 +00001316PyObject *
1317_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001318{
Tim Petersa7259592001-06-16 05:11:17 +00001319 assert(sep != NULL && PyString_Check(sep));
1320 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001321 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001322}
1323
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001324static void
1325string_adjust_indices(int *start, int *end, int len)
1326{
1327 if (*end > len)
1328 *end = len;
1329 else if (*end < 0)
1330 *end += len;
1331 if (*end < 0)
1332 *end = 0;
1333 if (*start < 0)
1334 *start += len;
1335 if (*start < 0)
1336 *start = 0;
1337}
1338
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339static long
Fred Drakeba096332000-07-09 07:04:36 +00001340string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001342 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001343 int len = PyString_GET_SIZE(self);
1344 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001345 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001347 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001348 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 return -2;
1350 if (PyString_Check(subobj)) {
1351 sub = PyString_AS_STRING(subobj);
1352 n = PyString_GET_SIZE(subobj);
1353 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001354#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 else if (PyUnicode_Check(subobj))
1356 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001357#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001358 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001359 return -2;
1360
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001361 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362
Guido van Rossum4c08d552000-03-10 22:55:18 +00001363 if (dir > 0) {
1364 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001366 last -= n;
1367 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001368 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001369 return (long)i;
1370 }
1371 else {
1372 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001373
Guido van Rossum4c08d552000-03-10 22:55:18 +00001374 if (n == 0 && i <= last)
1375 return (long)last;
1376 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001377 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001378 return (long)j;
1379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001380
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001381 return -1;
1382}
1383
1384
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001385PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001386"S.find(sub [,start [,end]]) -> int\n\
1387\n\
1388Return the lowest index in S where substring sub is found,\n\
1389such that sub is contained within s[start,end]. Optional\n\
1390arguments start and end are interpreted as in slice notation.\n\
1391\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001392Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393
1394static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001395string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001397 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398 if (result == -2)
1399 return NULL;
1400 return PyInt_FromLong(result);
1401}
1402
1403
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001404PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001405"S.index(sub [,start [,end]]) -> int\n\
1406\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001407Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408
1409static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001410string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001411{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001412 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413 if (result == -2)
1414 return NULL;
1415 if (result == -1) {
1416 PyErr_SetString(PyExc_ValueError,
1417 "substring not found in string.index");
1418 return NULL;
1419 }
1420 return PyInt_FromLong(result);
1421}
1422
1423
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001424PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425"S.rfind(sub [,start [,end]]) -> int\n\
1426\n\
1427Return the highest index in S where substring sub is found,\n\
1428such that sub is contained within s[start,end]. Optional\n\
1429arguments start and end are interpreted as in slice notation.\n\
1430\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001431Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432
1433static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001434string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001436 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437 if (result == -2)
1438 return NULL;
1439 return PyInt_FromLong(result);
1440}
1441
1442
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001443PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444"S.rindex(sub [,start [,end]]) -> int\n\
1445\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001446Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447
1448static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001449string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001451 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452 if (result == -2)
1453 return NULL;
1454 if (result == -1) {
1455 PyErr_SetString(PyExc_ValueError,
1456 "substring not found in string.rindex");
1457 return NULL;
1458 }
1459 return PyInt_FromLong(result);
1460}
1461
1462
1463static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001464do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1465{
1466 char *s = PyString_AS_STRING(self);
1467 int len = PyString_GET_SIZE(self);
1468 char *sep = PyString_AS_STRING(sepobj);
1469 int seplen = PyString_GET_SIZE(sepobj);
1470 int i, j;
1471
1472 i = 0;
1473 if (striptype != RIGHTSTRIP) {
1474 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1475 i++;
1476 }
1477 }
1478
1479 j = len;
1480 if (striptype != LEFTSTRIP) {
1481 do {
1482 j--;
1483 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1484 j++;
1485 }
1486
1487 if (i == 0 && j == len && PyString_CheckExact(self)) {
1488 Py_INCREF(self);
1489 return (PyObject*)self;
1490 }
1491 else
1492 return PyString_FromStringAndSize(s+i, j-i);
1493}
1494
1495
1496static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001497do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498{
1499 char *s = PyString_AS_STRING(self);
1500 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001501
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502 i = 0;
1503 if (striptype != RIGHTSTRIP) {
1504 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1505 i++;
1506 }
1507 }
1508
1509 j = len;
1510 if (striptype != LEFTSTRIP) {
1511 do {
1512 j--;
1513 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1514 j++;
1515 }
1516
Tim Peters8fa5dd02001-09-12 02:18:30 +00001517 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518 Py_INCREF(self);
1519 return (PyObject*)self;
1520 }
1521 else
1522 return PyString_FromStringAndSize(s+i, j-i);
1523}
1524
1525
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001526static PyObject *
1527do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1528{
1529 PyObject *sep = NULL;
1530
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001531 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001532 return NULL;
1533
1534 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001535 if (PyString_Check(sep))
1536 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001537#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001538 else if (PyUnicode_Check(sep)) {
1539 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1540 PyObject *res;
1541 if (uniself==NULL)
1542 return NULL;
1543 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1544 striptype, sep);
1545 Py_DECREF(uniself);
1546 return res;
1547 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001548#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001549 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001550 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001551#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001552 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001553#else
1554 "%s arg must be None or str",
1555#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001556 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001557 return NULL;
1558 }
1559 return do_xstrip(self, striptype, sep);
1560 }
1561
1562 return do_strip(self, striptype);
1563}
1564
1565
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001566PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001567"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568\n\
1569Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001570whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001571If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001572If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573
1574static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001575string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001577 if (PyTuple_GET_SIZE(args) == 0)
1578 return do_strip(self, BOTHSTRIP); /* Common case */
1579 else
1580 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001581}
1582
1583
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001584PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001585"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001587Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001588If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001589If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590
1591static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001592string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001594 if (PyTuple_GET_SIZE(args) == 0)
1595 return do_strip(self, LEFTSTRIP); /* Common case */
1596 else
1597 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598}
1599
1600
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001601PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001602"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001604Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001605If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001606If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607
1608static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001609string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001611 if (PyTuple_GET_SIZE(args) == 0)
1612 return do_strip(self, RIGHTSTRIP); /* Common case */
1613 else
1614 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615}
1616
1617
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001618PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619"S.lower() -> string\n\
1620\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001621Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622
1623static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001624string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625{
1626 char *s = PyString_AS_STRING(self), *s_new;
1627 int i, n = PyString_GET_SIZE(self);
1628 PyObject *new;
1629
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 new = PyString_FromStringAndSize(NULL, n);
1631 if (new == NULL)
1632 return NULL;
1633 s_new = PyString_AsString(new);
1634 for (i = 0; i < n; i++) {
1635 int c = Py_CHARMASK(*s++);
1636 if (isupper(c)) {
1637 *s_new = tolower(c);
1638 } else
1639 *s_new = c;
1640 s_new++;
1641 }
1642 return new;
1643}
1644
1645
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001646PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001647"S.upper() -> string\n\
1648\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001649Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001650
1651static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001652string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653{
1654 char *s = PyString_AS_STRING(self), *s_new;
1655 int i, n = PyString_GET_SIZE(self);
1656 PyObject *new;
1657
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 new = PyString_FromStringAndSize(NULL, n);
1659 if (new == NULL)
1660 return NULL;
1661 s_new = PyString_AsString(new);
1662 for (i = 0; i < n; i++) {
1663 int c = Py_CHARMASK(*s++);
1664 if (islower(c)) {
1665 *s_new = toupper(c);
1666 } else
1667 *s_new = c;
1668 s_new++;
1669 }
1670 return new;
1671}
1672
1673
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001674PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001675"S.title() -> string\n\
1676\n\
1677Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001678characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679
1680static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001681string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001682{
1683 char *s = PyString_AS_STRING(self), *s_new;
1684 int i, n = PyString_GET_SIZE(self);
1685 int previous_is_cased = 0;
1686 PyObject *new;
1687
Guido van Rossum4c08d552000-03-10 22:55:18 +00001688 new = PyString_FromStringAndSize(NULL, n);
1689 if (new == NULL)
1690 return NULL;
1691 s_new = PyString_AsString(new);
1692 for (i = 0; i < n; i++) {
1693 int c = Py_CHARMASK(*s++);
1694 if (islower(c)) {
1695 if (!previous_is_cased)
1696 c = toupper(c);
1697 previous_is_cased = 1;
1698 } else if (isupper(c)) {
1699 if (previous_is_cased)
1700 c = tolower(c);
1701 previous_is_cased = 1;
1702 } else
1703 previous_is_cased = 0;
1704 *s_new++ = c;
1705 }
1706 return new;
1707}
1708
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001709PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001710"S.capitalize() -> string\n\
1711\n\
1712Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001713capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714
1715static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001716string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717{
1718 char *s = PyString_AS_STRING(self), *s_new;
1719 int i, n = PyString_GET_SIZE(self);
1720 PyObject *new;
1721
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722 new = PyString_FromStringAndSize(NULL, n);
1723 if (new == NULL)
1724 return NULL;
1725 s_new = PyString_AsString(new);
1726 if (0 < n) {
1727 int c = Py_CHARMASK(*s++);
1728 if (islower(c))
1729 *s_new = toupper(c);
1730 else
1731 *s_new = c;
1732 s_new++;
1733 }
1734 for (i = 1; i < n; i++) {
1735 int c = Py_CHARMASK(*s++);
1736 if (isupper(c))
1737 *s_new = tolower(c);
1738 else
1739 *s_new = c;
1740 s_new++;
1741 }
1742 return new;
1743}
1744
1745
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001746PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747"S.count(sub[, start[, end]]) -> int\n\
1748\n\
1749Return the number of occurrences of substring sub in string\n\
1750S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001751interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752
1753static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001754string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001756 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757 int len = PyString_GET_SIZE(self), n;
1758 int i = 0, last = INT_MAX;
1759 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761
Guido van Rossumc6821402000-05-08 14:08:05 +00001762 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1763 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001765
Guido van Rossum4c08d552000-03-10 22:55:18 +00001766 if (PyString_Check(subobj)) {
1767 sub = PyString_AS_STRING(subobj);
1768 n = PyString_GET_SIZE(subobj);
1769 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001770#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001771 else if (PyUnicode_Check(subobj)) {
1772 int count;
1773 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1774 if (count == -1)
1775 return NULL;
1776 else
1777 return PyInt_FromLong((long) count);
1778 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001779#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001780 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1781 return NULL;
1782
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001783 string_adjust_indices(&i, &last, len);
1784
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785 m = last + 1 - n;
1786 if (n == 0)
1787 return PyInt_FromLong((long) (m-i));
1788
1789 r = 0;
1790 while (i < m) {
1791 if (!memcmp(s+i, sub, n)) {
1792 r++;
1793 i += n;
1794 } else {
1795 i++;
1796 }
1797 }
1798 return PyInt_FromLong((long) r);
1799}
1800
1801
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001802PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803"S.swapcase() -> string\n\
1804\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001805Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001806converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807
1808static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001809string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810{
1811 char *s = PyString_AS_STRING(self), *s_new;
1812 int i, n = PyString_GET_SIZE(self);
1813 PyObject *new;
1814
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815 new = PyString_FromStringAndSize(NULL, n);
1816 if (new == NULL)
1817 return NULL;
1818 s_new = PyString_AsString(new);
1819 for (i = 0; i < n; i++) {
1820 int c = Py_CHARMASK(*s++);
1821 if (islower(c)) {
1822 *s_new = toupper(c);
1823 }
1824 else if (isupper(c)) {
1825 *s_new = tolower(c);
1826 }
1827 else
1828 *s_new = c;
1829 s_new++;
1830 }
1831 return new;
1832}
1833
1834
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001835PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836"S.translate(table [,deletechars]) -> string\n\
1837\n\
1838Return a copy of the string S, where all characters occurring\n\
1839in the optional argument deletechars are removed, and the\n\
1840remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001841translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842
1843static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001844string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001846 register char *input, *output;
1847 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848 register int i, c, changed = 0;
1849 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001850 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851 int inlen, tablen, dellen = 0;
1852 PyObject *result;
1853 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001854 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001855
Guido van Rossum4c08d552000-03-10 22:55:18 +00001856 if (!PyArg_ParseTuple(args, "O|O:translate",
1857 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859
1860 if (PyString_Check(tableobj)) {
1861 table1 = PyString_AS_STRING(tableobj);
1862 tablen = PyString_GET_SIZE(tableobj);
1863 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001864#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001866 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001867 parameter; instead a mapping to None will cause characters
1868 to be deleted. */
1869 if (delobj != NULL) {
1870 PyErr_SetString(PyExc_TypeError,
1871 "deletions are implemented differently for unicode");
1872 return NULL;
1873 }
1874 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1875 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001876#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001877 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879
1880 if (delobj != NULL) {
1881 if (PyString_Check(delobj)) {
1882 del_table = PyString_AS_STRING(delobj);
1883 dellen = PyString_GET_SIZE(delobj);
1884 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001885#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001886 else if (PyUnicode_Check(delobj)) {
1887 PyErr_SetString(PyExc_TypeError,
1888 "deletions are implemented differently for unicode");
1889 return NULL;
1890 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001891#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001892 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1893 return NULL;
1894
1895 if (tablen != 256) {
1896 PyErr_SetString(PyExc_ValueError,
1897 "translation table must be 256 characters long");
1898 return NULL;
1899 }
1900 }
1901 else {
1902 del_table = NULL;
1903 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904 }
1905
1906 table = table1;
1907 inlen = PyString_Size(input_obj);
1908 result = PyString_FromStringAndSize((char *)NULL, inlen);
1909 if (result == NULL)
1910 return NULL;
1911 output_start = output = PyString_AsString(result);
1912 input = PyString_AsString(input_obj);
1913
1914 if (dellen == 0) {
1915 /* If no deletions are required, use faster code */
1916 for (i = inlen; --i >= 0; ) {
1917 c = Py_CHARMASK(*input++);
1918 if (Py_CHARMASK((*output++ = table[c])) != c)
1919 changed = 1;
1920 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001921 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922 return result;
1923 Py_DECREF(result);
1924 Py_INCREF(input_obj);
1925 return input_obj;
1926 }
1927
1928 for (i = 0; i < 256; i++)
1929 trans_table[i] = Py_CHARMASK(table[i]);
1930
1931 for (i = 0; i < dellen; i++)
1932 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1933
1934 for (i = inlen; --i >= 0; ) {
1935 c = Py_CHARMASK(*input++);
1936 if (trans_table[c] != -1)
1937 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1938 continue;
1939 changed = 1;
1940 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001941 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942 Py_DECREF(result);
1943 Py_INCREF(input_obj);
1944 return input_obj;
1945 }
1946 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001947 if (inlen > 0)
1948 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949 return result;
1950}
1951
1952
1953/* What follows is used for implementing replace(). Perry Stoll. */
1954
1955/*
1956 mymemfind
1957
1958 strstr replacement for arbitrary blocks of memory.
1959
Barry Warsaw51ac5802000-03-20 16:36:48 +00001960 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961 contents of memory pointed to by PAT. Returns the index into MEM if
1962 found, or -1 if not found. If len of PAT is greater than length of
1963 MEM, the function returns -1.
1964*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001965static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001966mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967{
1968 register int ii;
1969
1970 /* pattern can not occur in the last pat_len-1 chars */
1971 len -= pat_len;
1972
1973 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001974 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975 return ii;
1976 }
1977 }
1978 return -1;
1979}
1980
1981/*
1982 mymemcnt
1983
1984 Return the number of distinct times PAT is found in MEM.
1985 meaning mem=1111 and pat==11 returns 2.
1986 mem=11111 and pat==11 also return 2.
1987 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001988static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001989mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990{
1991 register int offset = 0;
1992 int nfound = 0;
1993
1994 while (len >= 0) {
1995 offset = mymemfind(mem, len, pat, pat_len);
1996 if (offset == -1)
1997 break;
1998 mem += offset + pat_len;
1999 len -= offset + pat_len;
2000 nfound++;
2001 }
2002 return nfound;
2003}
2004
2005/*
2006 mymemreplace
2007
Thomas Wouters7e474022000-07-16 12:04:32 +00002008 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009 replaced with SUB.
2010
Thomas Wouters7e474022000-07-16 12:04:32 +00002011 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012 of PAT in STR, then the original string is returned. Otherwise, a new
2013 string is allocated here and returned.
2014
2015 on return, out_len is:
2016 the length of output string, or
2017 -1 if the input string is returned, or
2018 unchanged if an error occurs (no memory).
2019
2020 return value is:
2021 the new string allocated locally, or
2022 NULL if an error occurred.
2023*/
2024static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002025mymemreplace(const char *str, int len, /* input string */
2026 const char *pat, int pat_len, /* pattern string to find */
2027 const char *sub, int sub_len, /* substitution string */
2028 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002029 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002030{
2031 char *out_s;
2032 char *new_s;
2033 int nfound, offset, new_len;
2034
2035 if (len == 0 || pat_len > len)
2036 goto return_same;
2037
2038 /* find length of output string */
2039 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00002040 if (count < 0)
2041 count = INT_MAX;
2042 else if (nfound > count)
2043 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044 if (nfound == 0)
2045 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002046
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002048 if (new_len == 0) {
2049 /* Have to allocate something for the caller to free(). */
2050 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002051 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002052 return NULL;
2053 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002054 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002055 else {
2056 assert(new_len > 0);
2057 new_s = (char *)PyMem_MALLOC(new_len);
2058 if (new_s == NULL)
2059 return NULL;
2060 out_s = new_s;
2061
Tim Peters9c012af2001-05-10 00:32:57 +00002062 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00002063 /* find index of next instance of pattern */
2064 offset = mymemfind(str, len, pat, pat_len);
2065 if (offset == -1)
2066 break;
2067
2068 /* copy non matching part of input string */
2069 memcpy(new_s, str, offset);
2070 str += offset + pat_len;
2071 len -= offset + pat_len;
2072
2073 /* copy substitute into the output string */
2074 new_s += offset;
2075 memcpy(new_s, sub, sub_len);
2076 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002077 }
2078 /* copy any remaining values into output string */
2079 if (len > 0)
2080 memcpy(new_s, str, len);
2081 }
2082 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083 return out_s;
2084
2085 return_same:
2086 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002087 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088}
2089
2090
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002091PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092"S.replace (old, new[, maxsplit]) -> string\n\
2093\n\
2094Return a copy of string S with all occurrences of substring\n\
2095old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002096given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097
2098static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002099string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101 const char *str = PyString_AS_STRING(self), *sub, *repl;
2102 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002103 const int len = PyString_GET_SIZE(self);
2104 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002105 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 if (!PyArg_ParseTuple(args, "OO|i:replace",
2110 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002112
2113 if (PyString_Check(subobj)) {
2114 sub = PyString_AS_STRING(subobj);
2115 sub_len = PyString_GET_SIZE(subobj);
2116 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002117#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002118 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002119 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002120 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002121#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002122 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2123 return NULL;
2124
2125 if (PyString_Check(replobj)) {
2126 repl = PyString_AS_STRING(replobj);
2127 repl_len = PyString_GET_SIZE(replobj);
2128 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002129#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002130 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002131 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002132 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002133#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002134 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2135 return NULL;
2136
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002137 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002138 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139 return NULL;
2140 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002141 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142 if (new_s == NULL) {
2143 PyErr_NoMemory();
2144 return NULL;
2145 }
2146 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002147 if (PyString_CheckExact(self)) {
2148 /* we're returning another reference to self */
2149 new = (PyObject*)self;
2150 Py_INCREF(new);
2151 }
2152 else {
2153 new = PyString_FromStringAndSize(str, len);
2154 if (new == NULL)
2155 return NULL;
2156 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157 }
2158 else {
2159 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002160 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161 }
2162 return new;
2163}
2164
2165
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002166PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002167"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002169Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002171comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172
2173static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002174string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002178 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179 int plen;
2180 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002181 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002182 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183
Guido van Rossumc6821402000-05-08 14:08:05 +00002184 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2185 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186 return NULL;
2187 if (PyString_Check(subobj)) {
2188 prefix = PyString_AS_STRING(subobj);
2189 plen = PyString_GET_SIZE(subobj);
2190 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002191#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002192 else if (PyUnicode_Check(subobj)) {
2193 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002194 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002195 subobj, start, end, -1);
2196 if (rc == -1)
2197 return NULL;
2198 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002199 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002200 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002201#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 return NULL;
2204
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002205 string_adjust_indices(&start, &end, len);
2206
2207 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002208 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002210 if (end-start >= plen)
2211 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2212 else
2213 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214}
2215
2216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002217PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002218"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002220Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002222comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223
2224static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002225string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002227 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002229 const char* suffix;
2230 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002232 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002233 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234
Guido van Rossumc6821402000-05-08 14:08:05 +00002235 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2236 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002237 return NULL;
2238 if (PyString_Check(subobj)) {
2239 suffix = PyString_AS_STRING(subobj);
2240 slen = PyString_GET_SIZE(subobj);
2241 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002242#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002243 else if (PyUnicode_Check(subobj)) {
2244 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002245 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002246 subobj, start, end, +1);
2247 if (rc == -1)
2248 return NULL;
2249 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002250 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002251 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002252#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254 return NULL;
2255
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002256 string_adjust_indices(&start, &end, len);
2257
2258 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002259 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002261 if (end-slen > start)
2262 start = end - slen;
2263 if (end-start >= slen)
2264 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2265 else
2266 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267}
2268
2269
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002270PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002271"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002272\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002273Encodes S using the codec registered for encoding. encoding defaults\n\
2274to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002275handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002276a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002277
2278static PyObject *
2279string_encode(PyStringObject *self, PyObject *args)
2280{
2281 char *encoding = NULL;
2282 char *errors = NULL;
2283 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2284 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002285 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2286}
2287
2288
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002289PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002290"S.decode([encoding[,errors]]) -> object\n\
2291\n\
2292Decodes S using the codec registered for encoding. encoding defaults\n\
2293to the default encoding. errors may be given to set a different error\n\
2294handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002295a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002296
2297static PyObject *
2298string_decode(PyStringObject *self, PyObject *args)
2299{
2300 char *encoding = NULL;
2301 char *errors = NULL;
2302 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2303 return NULL;
2304 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002305}
2306
2307
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002308PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309"S.expandtabs([tabsize]) -> string\n\
2310\n\
2311Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002312If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002313
2314static PyObject*
2315string_expandtabs(PyStringObject *self, PyObject *args)
2316{
2317 const char *e, *p;
2318 char *q;
2319 int i, j;
2320 PyObject *u;
2321 int tabsize = 8;
2322
2323 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2324 return NULL;
2325
Thomas Wouters7e474022000-07-16 12:04:32 +00002326 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327 i = j = 0;
2328 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2329 for (p = PyString_AS_STRING(self); p < e; p++)
2330 if (*p == '\t') {
2331 if (tabsize > 0)
2332 j += tabsize - (j % tabsize);
2333 }
2334 else {
2335 j++;
2336 if (*p == '\n' || *p == '\r') {
2337 i += j;
2338 j = 0;
2339 }
2340 }
2341
2342 /* Second pass: create output string and fill it */
2343 u = PyString_FromStringAndSize(NULL, i + j);
2344 if (!u)
2345 return NULL;
2346
2347 j = 0;
2348 q = PyString_AS_STRING(u);
2349
2350 for (p = PyString_AS_STRING(self); p < e; p++)
2351 if (*p == '\t') {
2352 if (tabsize > 0) {
2353 i = tabsize - (j % tabsize);
2354 j += i;
2355 while (i--)
2356 *q++ = ' ';
2357 }
2358 }
2359 else {
2360 j++;
2361 *q++ = *p;
2362 if (*p == '\n' || *p == '\r')
2363 j = 0;
2364 }
2365
2366 return u;
2367}
2368
Tim Peters8fa5dd02001-09-12 02:18:30 +00002369static PyObject *
2370pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371{
2372 PyObject *u;
2373
2374 if (left < 0)
2375 left = 0;
2376 if (right < 0)
2377 right = 0;
2378
Tim Peters8fa5dd02001-09-12 02:18:30 +00002379 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380 Py_INCREF(self);
2381 return (PyObject *)self;
2382 }
2383
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002384 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002385 left + PyString_GET_SIZE(self) + right);
2386 if (u) {
2387 if (left)
2388 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002389 memcpy(PyString_AS_STRING(u) + left,
2390 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002391 PyString_GET_SIZE(self));
2392 if (right)
2393 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2394 fill, right);
2395 }
2396
2397 return u;
2398}
2399
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002400PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002401"S.ljust(width) -> string\n"
2402"\n"
2403"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002404"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002405
2406static PyObject *
2407string_ljust(PyStringObject *self, PyObject *args)
2408{
2409 int width;
2410 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2411 return NULL;
2412
Tim Peters8fa5dd02001-09-12 02:18:30 +00002413 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002414 Py_INCREF(self);
2415 return (PyObject*) self;
2416 }
2417
2418 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2419}
2420
2421
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002422PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002423"S.rjust(width) -> string\n"
2424"\n"
2425"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002426"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427
2428static PyObject *
2429string_rjust(PyStringObject *self, PyObject *args)
2430{
2431 int width;
2432 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2433 return NULL;
2434
Tim Peters8fa5dd02001-09-12 02:18:30 +00002435 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436 Py_INCREF(self);
2437 return (PyObject*) self;
2438 }
2439
2440 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2441}
2442
2443
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002444PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002445"S.center(width) -> string\n"
2446"\n"
2447"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002448"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449
2450static PyObject *
2451string_center(PyStringObject *self, PyObject *args)
2452{
2453 int marg, left;
2454 int width;
2455
2456 if (!PyArg_ParseTuple(args, "i:center", &width))
2457 return NULL;
2458
Tim Peters8fa5dd02001-09-12 02:18:30 +00002459 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002460 Py_INCREF(self);
2461 return (PyObject*) self;
2462 }
2463
2464 marg = width - PyString_GET_SIZE(self);
2465 left = marg / 2 + (marg & width & 1);
2466
2467 return pad(self, left, marg - left, ' ');
2468}
2469
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002470PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002471"S.zfill(width) -> string\n"
2472"\n"
2473"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002474"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002475
2476static PyObject *
2477string_zfill(PyStringObject *self, PyObject *args)
2478{
2479 int fill;
2480 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002481 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002482
2483 int width;
2484 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2485 return NULL;
2486
2487 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002488 if (PyString_CheckExact(self)) {
2489 Py_INCREF(self);
2490 return (PyObject*) self;
2491 }
2492 else
2493 return PyString_FromStringAndSize(
2494 PyString_AS_STRING(self),
2495 PyString_GET_SIZE(self)
2496 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002497 }
2498
2499 fill = width - PyString_GET_SIZE(self);
2500
2501 s = pad(self, fill, 0, '0');
2502
2503 if (s == NULL)
2504 return NULL;
2505
2506 p = PyString_AS_STRING(s);
2507 if (p[fill] == '+' || p[fill] == '-') {
2508 /* move sign to beginning of string */
2509 p[0] = p[fill];
2510 p[fill] = '0';
2511 }
2512
2513 return (PyObject*) s;
2514}
2515
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002516PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002517"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002518"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002519"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002520"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002521
2522static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002523string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002524{
Fred Drakeba096332000-07-09 07:04:36 +00002525 register const unsigned char *p
2526 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002527 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 /* Shortcut for single character strings */
2530 if (PyString_GET_SIZE(self) == 1 &&
2531 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002532 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002534 /* Special case for empty strings */
2535 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002536 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002537
Guido van Rossum4c08d552000-03-10 22:55:18 +00002538 e = p + PyString_GET_SIZE(self);
2539 for (; p < e; p++) {
2540 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002541 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002543 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544}
2545
2546
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002547PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002548"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002549\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002550Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002551and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002552
2553static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002554string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002555{
Fred Drakeba096332000-07-09 07:04:36 +00002556 register const unsigned char *p
2557 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002558 register const unsigned char *e;
2559
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002560 /* Shortcut for single character strings */
2561 if (PyString_GET_SIZE(self) == 1 &&
2562 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002563 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002564
2565 /* Special case for empty strings */
2566 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002567 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002568
2569 e = p + PyString_GET_SIZE(self);
2570 for (; p < e; p++) {
2571 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002572 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002573 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002574 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002575}
2576
2577
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002578PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002579"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002580\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002581Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002582and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002583
2584static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002585string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002586{
Fred Drakeba096332000-07-09 07:04:36 +00002587 register const unsigned char *p
2588 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002589 register const unsigned char *e;
2590
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002591 /* Shortcut for single character strings */
2592 if (PyString_GET_SIZE(self) == 1 &&
2593 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002594 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002595
2596 /* Special case for empty strings */
2597 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002598 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002599
2600 e = p + PyString_GET_SIZE(self);
2601 for (; p < e; p++) {
2602 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002603 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002604 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002605 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002606}
2607
2608
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002609PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002610"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002612Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002613False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614
2615static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002616string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002617{
Fred Drakeba096332000-07-09 07:04:36 +00002618 register const unsigned char *p
2619 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002620 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002621
Guido van Rossum4c08d552000-03-10 22:55:18 +00002622 /* Shortcut for single character strings */
2623 if (PyString_GET_SIZE(self) == 1 &&
2624 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002625 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002626
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002627 /* Special case for empty strings */
2628 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002629 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002630
Guido van Rossum4c08d552000-03-10 22:55:18 +00002631 e = p + PyString_GET_SIZE(self);
2632 for (; p < e; p++) {
2633 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002634 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002636 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002637}
2638
2639
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002640PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002641"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002642\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002643Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002644at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002645
2646static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002647string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002648{
Fred Drakeba096332000-07-09 07:04:36 +00002649 register const unsigned char *p
2650 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002651 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002652 int cased;
2653
Guido van Rossum4c08d552000-03-10 22:55:18 +00002654 /* Shortcut for single character strings */
2655 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002656 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002657
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002658 /* Special case for empty strings */
2659 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002660 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002661
Guido van Rossum4c08d552000-03-10 22:55:18 +00002662 e = p + PyString_GET_SIZE(self);
2663 cased = 0;
2664 for (; p < e; p++) {
2665 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002666 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002667 else if (!cased && islower(*p))
2668 cased = 1;
2669 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002670 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002671}
2672
2673
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002674PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002675"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002676\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002677Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002678at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002679
2680static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002681string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002682{
Fred Drakeba096332000-07-09 07:04:36 +00002683 register const unsigned char *p
2684 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002685 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002686 int cased;
2687
Guido van Rossum4c08d552000-03-10 22:55:18 +00002688 /* Shortcut for single character strings */
2689 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002690 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002691
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002692 /* Special case for empty strings */
2693 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002694 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002695
Guido van Rossum4c08d552000-03-10 22:55:18 +00002696 e = p + PyString_GET_SIZE(self);
2697 cased = 0;
2698 for (; p < e; p++) {
2699 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002700 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002701 else if (!cased && isupper(*p))
2702 cased = 1;
2703 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002704 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002705}
2706
2707
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002708PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002709"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002710\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002711Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002712may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002713ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002714
2715static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002716string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002717{
Fred Drakeba096332000-07-09 07:04:36 +00002718 register const unsigned char *p
2719 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002720 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002721 int cased, previous_is_cased;
2722
Guido van Rossum4c08d552000-03-10 22:55:18 +00002723 /* Shortcut for single character strings */
2724 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002725 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002726
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002727 /* Special case for empty strings */
2728 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002729 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002730
Guido van Rossum4c08d552000-03-10 22:55:18 +00002731 e = p + PyString_GET_SIZE(self);
2732 cased = 0;
2733 previous_is_cased = 0;
2734 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002735 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002736
2737 if (isupper(ch)) {
2738 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002739 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002740 previous_is_cased = 1;
2741 cased = 1;
2742 }
2743 else if (islower(ch)) {
2744 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002745 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002746 previous_is_cased = 1;
2747 cased = 1;
2748 }
2749 else
2750 previous_is_cased = 0;
2751 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002752 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002753}
2754
2755
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002756PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002757"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002758\n\
2759Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002760Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002761is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002762
2763#define SPLIT_APPEND(data, left, right) \
2764 str = PyString_FromStringAndSize(data + left, right - left); \
2765 if (!str) \
2766 goto onError; \
2767 if (PyList_Append(list, str)) { \
2768 Py_DECREF(str); \
2769 goto onError; \
2770 } \
2771 else \
2772 Py_DECREF(str);
2773
2774static PyObject*
2775string_splitlines(PyStringObject *self, PyObject *args)
2776{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002777 register int i;
2778 register int j;
2779 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002780 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002781 PyObject *list;
2782 PyObject *str;
2783 char *data;
2784
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002785 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002786 return NULL;
2787
2788 data = PyString_AS_STRING(self);
2789 len = PyString_GET_SIZE(self);
2790
Guido van Rossum4c08d552000-03-10 22:55:18 +00002791 list = PyList_New(0);
2792 if (!list)
2793 goto onError;
2794
2795 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002796 int eol;
2797
Guido van Rossum4c08d552000-03-10 22:55:18 +00002798 /* Find a line and append it */
2799 while (i < len && data[i] != '\n' && data[i] != '\r')
2800 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002801
2802 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002803 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002804 if (i < len) {
2805 if (data[i] == '\r' && i + 1 < len &&
2806 data[i+1] == '\n')
2807 i += 2;
2808 else
2809 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002810 if (keepends)
2811 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002812 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002813 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002814 j = i;
2815 }
2816 if (j < len) {
2817 SPLIT_APPEND(data, j, len);
2818 }
2819
2820 return list;
2821
2822 onError:
2823 Py_DECREF(list);
2824 return NULL;
2825}
2826
2827#undef SPLIT_APPEND
2828
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002829
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002830static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002831string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002832 /* Counterparts of the obsolete stropmodule functions; except
2833 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002834 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2835 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2836 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2837 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002838 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2839 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2840 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2841 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2842 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2843 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2844 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002845 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
2846 capitalize__doc__},
2847 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2848 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2849 endswith__doc__},
2850 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2851 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2852 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2853 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2854 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2855 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2856 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2857 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2858 startswith__doc__},
2859 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2860 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
2861 swapcase__doc__},
2862 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2863 translate__doc__},
2864 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2865 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2866 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2867 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2868 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
2869 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2870 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2871 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
2872 expandtabs__doc__},
2873 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
2874 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002875 {NULL, NULL} /* sentinel */
2876};
2877
Jeremy Hylton938ace62002-07-17 16:30:39 +00002878static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00002879str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2880
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002881static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002882string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002883{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002884 PyObject *x = NULL;
2885 static char *kwlist[] = {"object", 0};
2886
Guido van Rossumae960af2001-08-30 03:11:59 +00002887 if (type != &PyString_Type)
2888 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002889 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2890 return NULL;
2891 if (x == NULL)
2892 return PyString_FromString("");
2893 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002894}
2895
Guido van Rossumae960af2001-08-30 03:11:59 +00002896static PyObject *
2897str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2898{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002899 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002900 int n;
2901
2902 assert(PyType_IsSubtype(type, &PyString_Type));
2903 tmp = string_new(&PyString_Type, args, kwds);
2904 if (tmp == NULL)
2905 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002906 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002907 n = PyString_GET_SIZE(tmp);
2908 pnew = type->tp_alloc(type, n);
2909 if (pnew != NULL) {
2910 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002911 ((PyStringObject *)pnew)->ob_shash =
2912 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002913 ((PyStringObject *)pnew)->ob_sinterned =
2914 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002915 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002916 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002917 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002918}
2919
Guido van Rossumcacfc072002-05-24 19:01:59 +00002920static PyObject *
2921basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2922{
2923 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002924 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00002925 return NULL;
2926}
2927
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002928PyDoc_STRVAR(basestring_doc,
2929"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00002930
2931PyTypeObject PyBaseString_Type = {
2932 PyObject_HEAD_INIT(&PyType_Type)
2933 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002934 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00002935 0,
2936 0,
2937 0, /* tp_dealloc */
2938 0, /* tp_print */
2939 0, /* tp_getattr */
2940 0, /* tp_setattr */
2941 0, /* tp_compare */
2942 0, /* tp_repr */
2943 0, /* tp_as_number */
2944 0, /* tp_as_sequence */
2945 0, /* tp_as_mapping */
2946 0, /* tp_hash */
2947 0, /* tp_call */
2948 0, /* tp_str */
2949 0, /* tp_getattro */
2950 0, /* tp_setattro */
2951 0, /* tp_as_buffer */
2952 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2953 basestring_doc, /* tp_doc */
2954 0, /* tp_traverse */
2955 0, /* tp_clear */
2956 0, /* tp_richcompare */
2957 0, /* tp_weaklistoffset */
2958 0, /* tp_iter */
2959 0, /* tp_iternext */
2960 0, /* tp_methods */
2961 0, /* tp_members */
2962 0, /* tp_getset */
2963 &PyBaseObject_Type, /* tp_base */
2964 0, /* tp_dict */
2965 0, /* tp_descr_get */
2966 0, /* tp_descr_set */
2967 0, /* tp_dictoffset */
2968 0, /* tp_init */
2969 0, /* tp_alloc */
2970 basestring_new, /* tp_new */
2971 0, /* tp_free */
2972};
2973
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002974PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002975"str(object) -> string\n\
2976\n\
2977Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002978If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002979
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002980PyTypeObject PyString_Type = {
2981 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002982 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002983 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002984 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002985 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002986 (destructor)string_dealloc, /* tp_dealloc */
2987 (printfunc)string_print, /* tp_print */
2988 0, /* tp_getattr */
2989 0, /* tp_setattr */
2990 0, /* tp_compare */
2991 (reprfunc)string_repr, /* tp_repr */
2992 0, /* tp_as_number */
2993 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00002994 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002995 (hashfunc)string_hash, /* tp_hash */
2996 0, /* tp_call */
2997 (reprfunc)string_str, /* tp_str */
2998 PyObject_GenericGetAttr, /* tp_getattro */
2999 0, /* tp_setattro */
3000 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00003001 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003002 string_doc, /* tp_doc */
3003 0, /* tp_traverse */
3004 0, /* tp_clear */
3005 (richcmpfunc)string_richcompare, /* tp_richcompare */
3006 0, /* tp_weaklistoffset */
3007 0, /* tp_iter */
3008 0, /* tp_iternext */
3009 string_methods, /* tp_methods */
3010 0, /* tp_members */
3011 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003012 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003013 0, /* tp_dict */
3014 0, /* tp_descr_get */
3015 0, /* tp_descr_set */
3016 0, /* tp_dictoffset */
3017 0, /* tp_init */
3018 0, /* tp_alloc */
3019 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003020 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003021};
3022
3023void
Fred Drakeba096332000-07-09 07:04:36 +00003024PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003025{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003026 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003027 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003028 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003029 if (w == NULL || !PyString_Check(*pv)) {
3030 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003031 *pv = NULL;
3032 return;
3033 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003034 v = string_concat((PyStringObject *) *pv, w);
3035 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003036 *pv = v;
3037}
3038
Guido van Rossum013142a1994-08-30 08:19:36 +00003039void
Fred Drakeba096332000-07-09 07:04:36 +00003040PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003041{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003042 PyString_Concat(pv, w);
3043 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003044}
3045
3046
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003047/* The following function breaks the notion that strings are immutable:
3048 it changes the size of a string. We get away with this only if there
3049 is only one module referencing the object. You can also think of it
3050 as creating a new string object and destroying the old one, only
3051 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003052 already be known to some other part of the code...
3053 Note that if there's not enough memory to resize the string, the original
3054 string object at *pv is deallocated, *pv is set to NULL, an "out of
3055 memory" exception is set, and -1 is returned. Else (on success) 0 is
3056 returned, and the value in *pv may or may not be the same as on input.
3057 As always, an extra byte is allocated for a trailing \0 byte (newsize
3058 does *not* include that), and a trailing \0 byte is stored.
3059*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003060
3061int
Fred Drakeba096332000-07-09 07:04:36 +00003062_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003063{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003064 register PyObject *v;
3065 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003066 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003067 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003068 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003069 Py_DECREF(v);
3070 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003071 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003072 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003073 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003074 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003075 _Py_ForgetReference(v);
3076 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003077 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003078 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003079 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003080 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003081 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003082 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003083 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003084 _Py_NewReference(*pv);
3085 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003086 sv->ob_size = newsize;
3087 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003088 return 0;
3089}
Guido van Rossume5372401993-03-16 12:15:04 +00003090
3091/* Helpers for formatstring */
3092
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003093static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003094getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003095{
3096 int argidx = *p_argidx;
3097 if (argidx < arglen) {
3098 (*p_argidx)++;
3099 if (arglen < 0)
3100 return args;
3101 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003102 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003103 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003104 PyErr_SetString(PyExc_TypeError,
3105 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003106 return NULL;
3107}
3108
Tim Peters38fd5b62000-09-21 05:43:11 +00003109/* Format codes
3110 * F_LJUST '-'
3111 * F_SIGN '+'
3112 * F_BLANK ' '
3113 * F_ALT '#'
3114 * F_ZERO '0'
3115 */
Guido van Rossume5372401993-03-16 12:15:04 +00003116#define F_LJUST (1<<0)
3117#define F_SIGN (1<<1)
3118#define F_BLANK (1<<2)
3119#define F_ALT (1<<3)
3120#define F_ZERO (1<<4)
3121
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003122static int
Fred Drakeba096332000-07-09 07:04:36 +00003123formatfloat(char *buf, size_t buflen, int flags,
3124 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003125{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003126 /* fmt = '%#.' + `prec` + `type`
3127 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003128 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003129 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003130 x = PyFloat_AsDouble(v);
3131 if (x == -1.0 && PyErr_Occurred()) {
3132 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003133 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003134 }
Guido van Rossume5372401993-03-16 12:15:04 +00003135 if (prec < 0)
3136 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003137 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3138 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003139 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3140 (flags&F_ALT) ? "#" : "",
3141 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003142 /* worst case length calc to ensure no buffer overrun:
3143 fmt = %#.<prec>g
3144 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003145 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003146 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3147 If prec=0 the effective precision is 1 (the leading digit is
3148 always given), therefore increase by one to 10+prec. */
3149 if (buflen <= (size_t)10 + (size_t)prec) {
3150 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003151 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003152 return -1;
3153 }
Tim Peters885d4572001-11-28 20:27:42 +00003154 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003155 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003156}
3157
Tim Peters38fd5b62000-09-21 05:43:11 +00003158/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3159 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3160 * Python's regular ints.
3161 * Return value: a new PyString*, or NULL if error.
3162 * . *pbuf is set to point into it,
3163 * *plen set to the # of chars following that.
3164 * Caller must decref it when done using pbuf.
3165 * The string starting at *pbuf is of the form
3166 * "-"? ("0x" | "0X")? digit+
3167 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003168 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003169 * There will be at least prec digits, zero-filled on the left if
3170 * necessary to get that many.
3171 * val object to be converted
3172 * flags bitmask of format flags; only F_ALT is looked at
3173 * prec minimum number of digits; 0-fill on left if needed
3174 * type a character in [duoxX]; u acts the same as d
3175 *
3176 * CAUTION: o, x and X conversions on regular ints can never
3177 * produce a '-' sign, but can for Python's unbounded ints.
3178 */
3179PyObject*
3180_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3181 char **pbuf, int *plen)
3182{
3183 PyObject *result = NULL;
3184 char *buf;
3185 int i;
3186 int sign; /* 1 if '-', else 0 */
3187 int len; /* number of characters */
3188 int numdigits; /* len == numnondigits + numdigits */
3189 int numnondigits = 0;
3190
3191 switch (type) {
3192 case 'd':
3193 case 'u':
3194 result = val->ob_type->tp_str(val);
3195 break;
3196 case 'o':
3197 result = val->ob_type->tp_as_number->nb_oct(val);
3198 break;
3199 case 'x':
3200 case 'X':
3201 numnondigits = 2;
3202 result = val->ob_type->tp_as_number->nb_hex(val);
3203 break;
3204 default:
3205 assert(!"'type' not in [duoxX]");
3206 }
3207 if (!result)
3208 return NULL;
3209
3210 /* To modify the string in-place, there can only be one reference. */
3211 if (result->ob_refcnt != 1) {
3212 PyErr_BadInternalCall();
3213 return NULL;
3214 }
3215 buf = PyString_AsString(result);
3216 len = PyString_Size(result);
3217 if (buf[len-1] == 'L') {
3218 --len;
3219 buf[len] = '\0';
3220 }
3221 sign = buf[0] == '-';
3222 numnondigits += sign;
3223 numdigits = len - numnondigits;
3224 assert(numdigits > 0);
3225
Tim Petersfff53252001-04-12 18:38:48 +00003226 /* Get rid of base marker unless F_ALT */
3227 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003228 /* Need to skip 0x, 0X or 0. */
3229 int skipped = 0;
3230 switch (type) {
3231 case 'o':
3232 assert(buf[sign] == '0');
3233 /* If 0 is only digit, leave it alone. */
3234 if (numdigits > 1) {
3235 skipped = 1;
3236 --numdigits;
3237 }
3238 break;
3239 case 'x':
3240 case 'X':
3241 assert(buf[sign] == '0');
3242 assert(buf[sign + 1] == 'x');
3243 skipped = 2;
3244 numnondigits -= 2;
3245 break;
3246 }
3247 if (skipped) {
3248 buf += skipped;
3249 len -= skipped;
3250 if (sign)
3251 buf[0] = '-';
3252 }
3253 assert(len == numnondigits + numdigits);
3254 assert(numdigits > 0);
3255 }
3256
3257 /* Fill with leading zeroes to meet minimum width. */
3258 if (prec > numdigits) {
3259 PyObject *r1 = PyString_FromStringAndSize(NULL,
3260 numnondigits + prec);
3261 char *b1;
3262 if (!r1) {
3263 Py_DECREF(result);
3264 return NULL;
3265 }
3266 b1 = PyString_AS_STRING(r1);
3267 for (i = 0; i < numnondigits; ++i)
3268 *b1++ = *buf++;
3269 for (i = 0; i < prec - numdigits; i++)
3270 *b1++ = '0';
3271 for (i = 0; i < numdigits; i++)
3272 *b1++ = *buf++;
3273 *b1 = '\0';
3274 Py_DECREF(result);
3275 result = r1;
3276 buf = PyString_AS_STRING(result);
3277 len = numnondigits + prec;
3278 }
3279
3280 /* Fix up case for hex conversions. */
3281 switch (type) {
3282 case 'x':
3283 /* Need to convert all upper case letters to lower case. */
3284 for (i = 0; i < len; i++)
3285 if (buf[i] >= 'A' && buf[i] <= 'F')
3286 buf[i] += 'a'-'A';
3287 break;
3288 case 'X':
3289 /* Need to convert 0x to 0X (and -0x to -0X). */
3290 if (buf[sign + 1] == 'x')
3291 buf[sign + 1] = 'X';
3292 break;
3293 }
3294 *pbuf = buf;
3295 *plen = len;
3296 return result;
3297}
3298
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003299static int
Fred Drakeba096332000-07-09 07:04:36 +00003300formatint(char *buf, size_t buflen, int flags,
3301 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003302{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003303 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003304 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3305 + 1 + 1 = 24 */
3306 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003307 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003308
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003309 x = PyInt_AsLong(v);
3310 if (x == -1 && PyErr_Occurred()) {
3311 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003312 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003313 }
Guido van Rossume5372401993-03-16 12:15:04 +00003314 if (prec < 0)
3315 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003316
3317 if ((flags & F_ALT) &&
3318 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003319 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003320 * of issues that cause pain:
3321 * - when 0 is being converted, the C standard leaves off
3322 * the '0x' or '0X', which is inconsistent with other
3323 * %#x/%#X conversions and inconsistent with Python's
3324 * hex() function
3325 * - there are platforms that violate the standard and
3326 * convert 0 with the '0x' or '0X'
3327 * (Metrowerks, Compaq Tru64)
3328 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003329 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003330 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003331 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003332 * We can achieve the desired consistency by inserting our
3333 * own '0x' or '0X' prefix, and substituting %x/%X in place
3334 * of %#x/%#X.
3335 *
3336 * Note that this is the same approach as used in
3337 * formatint() in unicodeobject.c
3338 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003339 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003340 type, prec, type);
3341 }
3342 else {
3343 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003344 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003345 prec, type);
3346 }
3347
Tim Peters38fd5b62000-09-21 05:43:11 +00003348 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003349 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3350 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003351 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003352 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003353 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003354 return -1;
3355 }
Tim Peters885d4572001-11-28 20:27:42 +00003356 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003357 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003358}
3359
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003360static int
Fred Drakeba096332000-07-09 07:04:36 +00003361formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003362{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003363 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003364 if (PyString_Check(v)) {
3365 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003366 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003367 }
3368 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003369 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003370 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003371 }
3372 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003373 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003374}
3375
Guido van Rossum013142a1994-08-30 08:19:36 +00003376
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003377/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3378
3379 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3380 chars are formatted. XXX This is a magic number. Each formatting
3381 routine does bounds checking to ensure no overflow, but a better
3382 solution may be to malloc a buffer of appropriate size for each
3383 format. For now, the current solution is sufficient.
3384*/
3385#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003386
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003387PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003388PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003389{
3390 char *fmt, *res;
3391 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003392 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003393 PyObject *result, *orig_args;
3394#ifdef Py_USING_UNICODE
3395 PyObject *v, *w;
3396#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003397 PyObject *dict = NULL;
3398 if (format == NULL || !PyString_Check(format) || args == NULL) {
3399 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003400 return NULL;
3401 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003402 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003403 fmt = PyString_AS_STRING(format);
3404 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003405 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003406 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003407 if (result == NULL)
3408 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003409 res = PyString_AsString(result);
3410 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003411 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003412 argidx = 0;
3413 }
3414 else {
3415 arglen = -1;
3416 argidx = -2;
3417 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003418 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003419 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003420 while (--fmtcnt >= 0) {
3421 if (*fmt != '%') {
3422 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003423 rescnt = fmtcnt + 100;
3424 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003425 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003426 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003427 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003428 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003429 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003430 }
3431 *res++ = *fmt++;
3432 }
3433 else {
3434 /* Got a format specifier */
3435 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003436 int width = -1;
3437 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003438 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003439 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003440 PyObject *v = NULL;
3441 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003442 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003443 int sign;
3444 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003445 char formatbuf[FORMATBUFLEN];
3446 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003447#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003448 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003449 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003450#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003451
Guido van Rossumda9c2711996-12-05 21:58:58 +00003452 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003453 if (*fmt == '(') {
3454 char *keystart;
3455 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003456 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003457 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003458
3459 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003460 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003461 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003462 goto error;
3463 }
3464 ++fmt;
3465 --fmtcnt;
3466 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003467 /* Skip over balanced parentheses */
3468 while (pcount > 0 && --fmtcnt >= 0) {
3469 if (*fmt == ')')
3470 --pcount;
3471 else if (*fmt == '(')
3472 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003473 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003474 }
3475 keylen = fmt - keystart - 1;
3476 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003477 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003478 "incomplete format key");
3479 goto error;
3480 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003481 key = PyString_FromStringAndSize(keystart,
3482 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003483 if (key == NULL)
3484 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003485 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003486 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003487 args_owned = 0;
3488 }
3489 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003490 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003491 if (args == NULL) {
3492 goto error;
3493 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003494 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003495 arglen = -1;
3496 argidx = -2;
3497 }
Guido van Rossume5372401993-03-16 12:15:04 +00003498 while (--fmtcnt >= 0) {
3499 switch (c = *fmt++) {
3500 case '-': flags |= F_LJUST; continue;
3501 case '+': flags |= F_SIGN; continue;
3502 case ' ': flags |= F_BLANK; continue;
3503 case '#': flags |= F_ALT; continue;
3504 case '0': flags |= F_ZERO; continue;
3505 }
3506 break;
3507 }
3508 if (c == '*') {
3509 v = getnextarg(args, arglen, &argidx);
3510 if (v == NULL)
3511 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003512 if (!PyInt_Check(v)) {
3513 PyErr_SetString(PyExc_TypeError,
3514 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003515 goto error;
3516 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003517 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003518 if (width < 0) {
3519 flags |= F_LJUST;
3520 width = -width;
3521 }
Guido van Rossume5372401993-03-16 12:15:04 +00003522 if (--fmtcnt >= 0)
3523 c = *fmt++;
3524 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003525 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003526 width = c - '0';
3527 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003528 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003529 if (!isdigit(c))
3530 break;
3531 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003532 PyErr_SetString(
3533 PyExc_ValueError,
3534 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003535 goto error;
3536 }
3537 width = width*10 + (c - '0');
3538 }
3539 }
3540 if (c == '.') {
3541 prec = 0;
3542 if (--fmtcnt >= 0)
3543 c = *fmt++;
3544 if (c == '*') {
3545 v = getnextarg(args, arglen, &argidx);
3546 if (v == NULL)
3547 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003548 if (!PyInt_Check(v)) {
3549 PyErr_SetString(
3550 PyExc_TypeError,
3551 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003552 goto error;
3553 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003554 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003555 if (prec < 0)
3556 prec = 0;
3557 if (--fmtcnt >= 0)
3558 c = *fmt++;
3559 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003560 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003561 prec = c - '0';
3562 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003563 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003564 if (!isdigit(c))
3565 break;
3566 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003567 PyErr_SetString(
3568 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003569 "prec too big");
3570 goto error;
3571 }
3572 prec = prec*10 + (c - '0');
3573 }
3574 }
3575 } /* prec */
3576 if (fmtcnt >= 0) {
3577 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003578 if (--fmtcnt >= 0)
3579 c = *fmt++;
3580 }
3581 }
3582 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003583 PyErr_SetString(PyExc_ValueError,
3584 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003585 goto error;
3586 }
3587 if (c != '%') {
3588 v = getnextarg(args, arglen, &argidx);
3589 if (v == NULL)
3590 goto error;
3591 }
3592 sign = 0;
3593 fill = ' ';
3594 switch (c) {
3595 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003596 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003597 len = 1;
3598 break;
3599 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003600 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003601#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003602 if (PyUnicode_Check(v)) {
3603 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003604 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003605 goto unicode;
3606 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003607#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003608 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003609 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003610 else
3611 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003612 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003613 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003614 if (!PyString_Check(temp)) {
3615 PyErr_SetString(PyExc_TypeError,
3616 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003617 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003618 goto error;
3619 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003620 pbuf = PyString_AS_STRING(temp);
3621 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003622 if (prec >= 0 && len > prec)
3623 len = prec;
3624 break;
3625 case 'i':
3626 case 'd':
3627 case 'u':
3628 case 'o':
3629 case 'x':
3630 case 'X':
3631 if (c == 'i')
3632 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003633 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003634 temp = _PyString_FormatLong(v, flags,
3635 prec, c, &pbuf, &len);
3636 if (!temp)
3637 goto error;
3638 /* unbounded ints can always produce
3639 a sign character! */
3640 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003641 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003642 else {
3643 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003644 len = formatint(pbuf,
3645 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003646 flags, prec, c, v);
3647 if (len < 0)
3648 goto error;
3649 /* only d conversion is signed */
3650 sign = c == 'd';
3651 }
3652 if (flags & F_ZERO)
3653 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003654 break;
3655 case 'e':
3656 case 'E':
3657 case 'f':
3658 case 'g':
3659 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003660 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003661 len = formatfloat(pbuf, sizeof(formatbuf),
3662 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003663 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003664 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003665 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003666 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003667 fill = '0';
3668 break;
3669 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003670 pbuf = formatbuf;
3671 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003672 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003673 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003674 break;
3675 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003676 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003677 "unsupported format character '%c' (0x%x) "
3678 "at index %i",
3679 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003680 goto error;
3681 }
3682 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003683 if (*pbuf == '-' || *pbuf == '+') {
3684 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003685 len--;
3686 }
3687 else if (flags & F_SIGN)
3688 sign = '+';
3689 else if (flags & F_BLANK)
3690 sign = ' ';
3691 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003692 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003693 }
3694 if (width < len)
3695 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003696 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003697 reslen -= rescnt;
3698 rescnt = width + fmtcnt + 100;
3699 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003700 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003701 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003702 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003703 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003704 }
3705 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003706 if (fill != ' ')
3707 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003708 rescnt--;
3709 if (width > len)
3710 width--;
3711 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003712 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3713 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003714 assert(pbuf[1] == c);
3715 if (fill != ' ') {
3716 *res++ = *pbuf++;
3717 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003718 }
Tim Petersfff53252001-04-12 18:38:48 +00003719 rescnt -= 2;
3720 width -= 2;
3721 if (width < 0)
3722 width = 0;
3723 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003724 }
3725 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003726 do {
3727 --rescnt;
3728 *res++ = fill;
3729 } while (--width > len);
3730 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003731 if (fill == ' ') {
3732 if (sign)
3733 *res++ = sign;
3734 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003735 (c == 'x' || c == 'X')) {
3736 assert(pbuf[0] == '0');
3737 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003738 *res++ = *pbuf++;
3739 *res++ = *pbuf++;
3740 }
3741 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003742 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003743 res += len;
3744 rescnt -= len;
3745 while (--width >= len) {
3746 --rescnt;
3747 *res++ = ' ';
3748 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003749 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003750 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003751 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003752 goto error;
3753 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003754 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003755 } /* '%' */
3756 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003757 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003758 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003759 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003760 goto error;
3761 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003762 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003763 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003764 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003765 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003766 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003767
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003768#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003769 unicode:
3770 if (args_owned) {
3771 Py_DECREF(args);
3772 args_owned = 0;
3773 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003774 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003775 if (PyTuple_Check(orig_args) && argidx > 0) {
3776 PyObject *v;
3777 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3778 v = PyTuple_New(n);
3779 if (v == NULL)
3780 goto error;
3781 while (--n >= 0) {
3782 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3783 Py_INCREF(w);
3784 PyTuple_SET_ITEM(v, n, w);
3785 }
3786 args = v;
3787 } else {
3788 Py_INCREF(orig_args);
3789 args = orig_args;
3790 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003791 args_owned = 1;
3792 /* Take what we have of the result and let the Unicode formatting
3793 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003794 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003795 if (_PyString_Resize(&result, rescnt))
3796 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003797 fmtcnt = PyString_GET_SIZE(format) - \
3798 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003799 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3800 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003801 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003802 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003803 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003804 if (v == NULL)
3805 goto error;
3806 /* Paste what we have (result) to what the Unicode formatting
3807 function returned (v) and return the result (or error) */
3808 w = PyUnicode_Concat(result, v);
3809 Py_DECREF(result);
3810 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003811 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003812 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003813#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003814
Guido van Rossume5372401993-03-16 12:15:04 +00003815 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003816 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003817 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003818 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003819 }
Guido van Rossume5372401993-03-16 12:15:04 +00003820 return NULL;
3821}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003822
3823
Guido van Rossum2a61e741997-01-18 07:55:05 +00003824
Barry Warsaw4df762f2000-08-16 23:41:01 +00003825/* This dictionary will leak at PyString_Fini() time. That's acceptable
3826 * because PyString_Fini() specifically frees interned strings that are
3827 * only referenced by this dictionary. The CVS log entry for revision 2.45
3828 * says:
3829 *
3830 * Change the Fini function to only remove otherwise unreferenced
3831 * strings from the interned table. There are references in
3832 * hard-to-find static variables all over the interpreter, and it's not
3833 * worth trying to get rid of all those; but "uninterning" isn't fair
3834 * either and may cause subtle failures later -- so we have to keep them
3835 * in the interned table.
3836 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003837static PyObject *interned;
3838
3839void
Fred Drakeba096332000-07-09 07:04:36 +00003840PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003841{
3842 register PyStringObject *s = (PyStringObject *)(*p);
3843 PyObject *t;
3844 if (s == NULL || !PyString_Check(s))
3845 Py_FatalError("PyString_InternInPlace: strings only please!");
3846 if ((t = s->ob_sinterned) != NULL) {
3847 if (t == (PyObject *)s)
3848 return;
3849 Py_INCREF(t);
3850 *p = t;
3851 Py_DECREF(s);
3852 return;
3853 }
3854 if (interned == NULL) {
3855 interned = PyDict_New();
3856 if (interned == NULL)
3857 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003858 }
3859 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3860 Py_INCREF(t);
3861 *p = s->ob_sinterned = t;
3862 Py_DECREF(s);
3863 return;
3864 }
Tim Peters111f6092001-09-12 07:54:51 +00003865 /* Ensure that only true string objects appear in the intern dict,
3866 and as the value of ob_sinterned. */
3867 if (PyString_CheckExact(s)) {
3868 t = (PyObject *)s;
3869 if (PyDict_SetItem(interned, t, t) == 0) {
3870 s->ob_sinterned = t;
3871 return;
3872 }
3873 }
3874 else {
3875 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3876 PyString_GET_SIZE(s));
3877 if (t != NULL) {
3878 if (PyDict_SetItem(interned, t, t) == 0) {
3879 *p = s->ob_sinterned = t;
3880 Py_DECREF(s);
3881 return;
3882 }
3883 Py_DECREF(t);
3884 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003885 }
3886 PyErr_Clear();
3887}
3888
3889
3890PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003891PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003892{
3893 PyObject *s = PyString_FromString(cp);
3894 if (s == NULL)
3895 return NULL;
3896 PyString_InternInPlace(&s);
3897 return s;
3898}
3899
Guido van Rossum8cf04761997-08-02 02:57:45 +00003900void
Fred Drakeba096332000-07-09 07:04:36 +00003901PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003902{
3903 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003904 for (i = 0; i < UCHAR_MAX + 1; i++) {
3905 Py_XDECREF(characters[i]);
3906 characters[i] = NULL;
3907 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003908 Py_XDECREF(nullstring);
3909 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003910 if (interned) {
3911 int pos, changed;
3912 PyObject *key, *value;
3913 do {
3914 changed = 0;
3915 pos = 0;
3916 while (PyDict_Next(interned, &pos, &key, &value)) {
3917 if (key->ob_refcnt == 2 && key == value) {
3918 PyDict_DelItem(interned, key);
3919 changed = 1;
3920 }
3921 }
3922 } while (changed);
3923 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003924}
Barry Warsawa903ad982001-02-23 16:40:48 +00003925
Barry Warsawa903ad982001-02-23 16:40:48 +00003926void _Py_ReleaseInternedStrings(void)
3927{
3928 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003929 fprintf(stderr, "releasing interned strings\n");
3930 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003931 Py_DECREF(interned);
3932 interned = NULL;
3933 }
3934}