blob: a21e021d925e0dce77fe4ebb3a3adbee59a05d03 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
18/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000019 For both PyString_FromString() and PyString_FromStringAndSize(), the
20 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000021 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000022
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000023 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000025
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For PyString_FromStringAndSize(), the parameter the parameter `str' is
27 either NULL or else points to a string containing at least `size' bytes.
28 For PyString_FromStringAndSize(), the string in the `str' parameter does
29 not have to be null-terminated. (Therefore it is safe to construct a
30 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
31 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
32 bytes (setting the last byte to the null terminating character) and you can
33 fill in the data yourself. If `str' is non-NULL then the resulting
34 PyString object must be treated as immutable and you must not fill in nor
35 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 The PyObject member `op->ob_size', which denotes the number of "extra
38 items" in a variable-size object, will contain the number of bytes
39 allocated for string data, not counting the null terminating character. It
40 is therefore equal to the equal to the `size' parameter (for
41 PyString_FromStringAndSize()) or the length of the string in the `str'
42 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000067 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000123 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000159#ifdef __va_copy
160 __va_copy(count, vargs);
161#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000162 count = vargs;
163#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000164#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000165 /* step 1: figure out how large a buffer we need */
166 for (f = format; *f; f++) {
167 if (*f == '%') {
168 const char* p = f;
169 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
170 ;
171
172 /* skip the 'l' in %ld, since it doesn't change the
173 width. although only %d is supported (see
174 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000175 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000176 if (*f == 'l' && *(f+1) == 'd')
177 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000178
Barry Warsawdadace02001-08-24 18:32:06 +0000179 switch (*f) {
180 case 'c':
181 (void)va_arg(count, int);
182 /* fall through... */
183 case '%':
184 n++;
185 break;
186 case 'd': case 'i': case 'x':
187 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000188 /* 20 bytes is enough to hold a 64-bit
189 integer. Decimal takes the most space.
190 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000191 n += 20;
192 break;
193 case 's':
194 s = va_arg(count, char*);
195 n += strlen(s);
196 break;
197 case 'p':
198 (void) va_arg(count, int);
199 /* maximum 64-bit pointer representation:
200 * 0xffffffffffffffff
201 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000202 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000203 */
204 n += 19;
205 break;
206 default:
207 /* if we stumble upon an unknown
208 formatting code, copy the rest of
209 the format string to the output
210 string. (we cannot just skip the
211 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000212 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000213 n += strlen(p);
214 goto expand;
215 }
216 } else
217 n++;
218 }
219 expand:
220 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000221 /* Since we've analyzed how much space we need for the worst case,
222 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000223 string = PyString_FromStringAndSize(NULL, n);
224 if (!string)
225 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000226
Barry Warsawdadace02001-08-24 18:32:06 +0000227 s = PyString_AsString(string);
228
229 for (f = format; *f; f++) {
230 if (*f == '%') {
231 const char* p = f++;
232 int i, longflag = 0;
233 /* parse the width.precision part (we're only
234 interested in the precision value, if any) */
235 n = 0;
236 while (isdigit(Py_CHARMASK(*f)))
237 n = (n*10) + *f++ - '0';
238 if (*f == '.') {
239 f++;
240 n = 0;
241 while (isdigit(Py_CHARMASK(*f)))
242 n = (n*10) + *f++ - '0';
243 }
244 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
245 f++;
246 /* handle the long flag, but only for %ld. others
247 can be added when necessary. */
248 if (*f == 'l' && *(f+1) == 'd') {
249 longflag = 1;
250 ++f;
251 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000252
Barry Warsawdadace02001-08-24 18:32:06 +0000253 switch (*f) {
254 case 'c':
255 *s++ = va_arg(vargs, int);
256 break;
257 case 'd':
258 if (longflag)
259 sprintf(s, "%ld", va_arg(vargs, long));
260 else
261 sprintf(s, "%d", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'i':
265 sprintf(s, "%i", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 'x':
269 sprintf(s, "%x", va_arg(vargs, int));
270 s += strlen(s);
271 break;
272 case 's':
273 p = va_arg(vargs, char*);
274 i = strlen(p);
275 if (n > 0 && i > n)
276 i = n;
277 memcpy(s, p, i);
278 s += i;
279 break;
280 case 'p':
281 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000282 /* %p is ill-defined: ensure leading 0x. */
283 if (s[1] == 'X')
284 s[1] = 'x';
285 else if (s[1] != 'x') {
286 memmove(s+2, s, strlen(s)+1);
287 s[0] = '0';
288 s[1] = 'x';
289 }
Barry Warsawdadace02001-08-24 18:32:06 +0000290 s += strlen(s);
291 break;
292 case '%':
293 *s++ = '%';
294 break;
295 default:
296 strcpy(s, p);
297 s += strlen(s);
298 goto end;
299 }
300 } else
301 *s++ = *f;
302 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000303
Barry Warsawdadace02001-08-24 18:32:06 +0000304 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000305 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000306 return string;
307}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000308
Barry Warsawdadace02001-08-24 18:32:06 +0000309PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000310PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000311{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000312 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000313 va_list vargs;
314
315#ifdef HAVE_STDARG_PROTOTYPES
316 va_start(vargs, format);
317#else
318 va_start(vargs);
319#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000320 ret = PyString_FromFormatV(format, vargs);
321 va_end(vargs);
322 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000323}
324
325
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000326PyObject *PyString_Decode(const char *s,
327 int size,
328 const char *encoding,
329 const char *errors)
330{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000331 PyObject *v, *str;
332
333 str = PyString_FromStringAndSize(s, size);
334 if (str == NULL)
335 return NULL;
336 v = PyString_AsDecodedString(str, encoding, errors);
337 Py_DECREF(str);
338 return v;
339}
340
341PyObject *PyString_AsDecodedObject(PyObject *str,
342 const char *encoding,
343 const char *errors)
344{
345 PyObject *v;
346
347 if (!PyString_Check(str)) {
348 PyErr_BadArgument();
349 goto onError;
350 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000351
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000352 if (encoding == NULL) {
353#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000355#else
356 PyErr_SetString(PyExc_ValueError, "no encoding specified");
357 goto onError;
358#endif
359 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360
361 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000362 v = PyCodec_Decode(str, encoding, errors);
363 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365
366 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000367
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000368 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000369 return NULL;
370}
371
372PyObject *PyString_AsDecodedString(PyObject *str,
373 const char *encoding,
374 const char *errors)
375{
376 PyObject *v;
377
378 v = PyString_AsDecodedObject(str, encoding, errors);
379 if (v == NULL)
380 goto onError;
381
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000382#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000383 /* Convert Unicode to a string using the default encoding */
384 if (PyUnicode_Check(v)) {
385 PyObject *temp = v;
386 v = PyUnicode_AsEncodedString(v, NULL, NULL);
387 Py_DECREF(temp);
388 if (v == NULL)
389 goto onError;
390 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000391#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000392 if (!PyString_Check(v)) {
393 PyErr_Format(PyExc_TypeError,
394 "decoder did not return a string object (type=%.400s)",
395 v->ob_type->tp_name);
396 Py_DECREF(v);
397 goto onError;
398 }
399
400 return v;
401
402 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 return NULL;
404}
405
406PyObject *PyString_Encode(const char *s,
407 int size,
408 const char *encoding,
409 const char *errors)
410{
411 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000412
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000413 str = PyString_FromStringAndSize(s, size);
414 if (str == NULL)
415 return NULL;
416 v = PyString_AsEncodedString(str, encoding, errors);
417 Py_DECREF(str);
418 return v;
419}
420
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000422 const char *encoding,
423 const char *errors)
424{
425 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000426
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 if (!PyString_Check(str)) {
428 PyErr_BadArgument();
429 goto onError;
430 }
431
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000432 if (encoding == NULL) {
433#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000435#else
436 PyErr_SetString(PyExc_ValueError, "no encoding specified");
437 goto onError;
438#endif
439 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000440
441 /* Encode via the codec registry */
442 v = PyCodec_Encode(str, encoding, errors);
443 if (v == NULL)
444 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000445
446 return v;
447
448 onError:
449 return NULL;
450}
451
452PyObject *PyString_AsEncodedString(PyObject *str,
453 const char *encoding,
454 const char *errors)
455{
456 PyObject *v;
457
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000458 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000459 if (v == NULL)
460 goto onError;
461
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 /* Convert Unicode to a string using the default encoding */
464 if (PyUnicode_Check(v)) {
465 PyObject *temp = v;
466 v = PyUnicode_AsEncodedString(v, NULL, NULL);
467 Py_DECREF(temp);
468 if (v == NULL)
469 goto onError;
470 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000471#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000472 if (!PyString_Check(v)) {
473 PyErr_Format(PyExc_TypeError,
474 "encoder did not return a string object (type=%.400s)",
475 v->ob_type->tp_name);
476 Py_DECREF(v);
477 goto onError;
478 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000479
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000480 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000481
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000482 onError:
483 return NULL;
484}
485
Guido van Rossum234f9421993-06-17 12:35:49 +0000486static void
Fred Drakeba096332000-07-09 07:04:36 +0000487string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000488{
Guido van Rossum9475a232001-10-05 20:51:39 +0000489 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000490}
491
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000492/* Unescape a backslash-escaped string. If unicode is non-zero,
493 the string is a u-literal. If recode_encoding is non-zero,
494 the string is UTF-8 encoded and should be re-encoded in the
495 specified encoding. */
496
497PyObject *PyString_DecodeEscape(const char *s,
498 int len,
499 const char *errors,
500 int unicode,
501 const char *recode_encoding)
502{
503 int c;
504 char *p, *buf;
505 const char *end;
506 PyObject *v;
507 v = PyString_FromStringAndSize((char *)NULL,
508 recode_encoding ? 4*len:len);
509 if (v == NULL)
510 return NULL;
511 p = buf = PyString_AsString(v);
512 end = s + len;
513 while (s < end) {
514 if (*s != '\\') {
515#ifdef Py_USING_UNICODE
516 if (recode_encoding && (*s & 0x80)) {
517 PyObject *u, *w;
518 char *r;
519 const char* t;
520 int rn;
521 t = s;
522 /* Decode non-ASCII bytes as UTF-8. */
523 while (t < end && (*t & 0x80)) t++;
524 u = PyUnicode_DecodeUTF8(s, t - s, errors);
525 if(!u) goto failed;
526
527 /* Recode them in target encoding. */
528 w = PyUnicode_AsEncodedString(
529 u, recode_encoding, errors);
530 Py_DECREF(u);
531 if (!w) goto failed;
532
533 /* Append bytes to output buffer. */
534 r = PyString_AsString(w);
535 rn = PyString_Size(w);
536 memcpy(p, r, rn);
537 p += rn;
538 Py_DECREF(w);
539 s = t;
540 } else {
541 *p++ = *s++;
542 }
543#else
544 *p++ = *s++;
545#endif
546 continue;
547 }
548 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000549 if (s==end) {
550 PyErr_SetString(PyExc_ValueError,
551 "Trailing \\ in string");
552 goto failed;
553 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000554 switch (*s++) {
555 /* XXX This assumes ASCII! */
556 case '\n': break;
557 case '\\': *p++ = '\\'; break;
558 case '\'': *p++ = '\''; break;
559 case '\"': *p++ = '\"'; break;
560 case 'b': *p++ = '\b'; break;
561 case 'f': *p++ = '\014'; break; /* FF */
562 case 't': *p++ = '\t'; break;
563 case 'n': *p++ = '\n'; break;
564 case 'r': *p++ = '\r'; break;
565 case 'v': *p++ = '\013'; break; /* VT */
566 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
567 case '0': case '1': case '2': case '3':
568 case '4': case '5': case '6': case '7':
569 c = s[-1] - '0';
570 if ('0' <= *s && *s <= '7') {
571 c = (c<<3) + *s++ - '0';
572 if ('0' <= *s && *s <= '7')
573 c = (c<<3) + *s++ - '0';
574 }
575 *p++ = c;
576 break;
577 case 'x':
578 if (isxdigit(Py_CHARMASK(s[0]))
579 && isxdigit(Py_CHARMASK(s[1]))) {
580 unsigned int x = 0;
581 c = Py_CHARMASK(*s);
582 s++;
583 if (isdigit(c))
584 x = c - '0';
585 else if (islower(c))
586 x = 10 + c - 'a';
587 else
588 x = 10 + c - 'A';
589 x = x << 4;
590 c = Py_CHARMASK(*s);
591 s++;
592 if (isdigit(c))
593 x += c - '0';
594 else if (islower(c))
595 x += 10 + c - 'a';
596 else
597 x += 10 + c - 'A';
598 *p++ = x;
599 break;
600 }
601 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000602 PyErr_SetString(PyExc_ValueError,
603 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000604 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000605 }
606 if (strcmp(errors, "replace") == 0) {
607 *p++ = '?';
608 } else if (strcmp(errors, "ignore") == 0)
609 /* do nothing */;
610 else {
611 PyErr_Format(PyExc_ValueError,
612 "decoding error; "
613 "unknown error handling code: %.400s",
614 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000615 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000616 }
617#ifndef Py_USING_UNICODE
618 case 'u':
619 case 'U':
620 case 'N':
621 if (unicode) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 com_error(com, PyExc_ValueError,
623 "Unicode escapes not legal "
624 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000625 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000626 }
627#endif
628 default:
629 *p++ = '\\';
630 *p++ = s[-1];
631 break;
632 }
633 }
634 _PyString_Resize(&v, (int)(p - buf));
635 return v;
636 failed:
637 Py_DECREF(v);
638 return NULL;
639}
640
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000641static int
642string_getsize(register PyObject *op)
643{
644 char *s;
645 int len;
646 if (PyString_AsStringAndSize(op, &s, &len))
647 return -1;
648 return len;
649}
650
651static /*const*/ char *
652string_getbuffer(register PyObject *op)
653{
654 char *s;
655 int len;
656 if (PyString_AsStringAndSize(op, &s, &len))
657 return NULL;
658 return s;
659}
660
Guido van Rossumd7047b31995-01-02 19:07:15 +0000661int
Fred Drakeba096332000-07-09 07:04:36 +0000662PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000664 if (!PyString_Check(op))
665 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000666 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667}
668
669/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000670PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000671{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000672 if (!PyString_Check(op))
673 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000674 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675}
676
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000677int
678PyString_AsStringAndSize(register PyObject *obj,
679 register char **s,
680 register int *len)
681{
682 if (s == NULL) {
683 PyErr_BadInternalCall();
684 return -1;
685 }
686
687 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000688#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000689 if (PyUnicode_Check(obj)) {
690 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
691 if (obj == NULL)
692 return -1;
693 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000694 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000695#endif
696 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697 PyErr_Format(PyExc_TypeError,
698 "expected string or Unicode object, "
699 "%.200s found", obj->ob_type->tp_name);
700 return -1;
701 }
702 }
703
704 *s = PyString_AS_STRING(obj);
705 if (len != NULL)
706 *len = PyString_GET_SIZE(obj);
707 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
708 PyErr_SetString(PyExc_TypeError,
709 "expected string without null bytes");
710 return -1;
711 }
712 return 0;
713}
714
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715/* Methods */
716
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000717static int
Fred Drakeba096332000-07-09 07:04:36 +0000718string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719{
720 int i;
721 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000722 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000723
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000724 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000725 if (! PyString_CheckExact(op)) {
726 int ret;
727 /* A str subclass may have its own __str__ method. */
728 op = (PyStringObject *) PyObject_Str((PyObject *)op);
729 if (op == NULL)
730 return -1;
731 ret = string_print(op, fp, flags);
732 Py_DECREF(op);
733 return ret;
734 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000736 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000737 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000738 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000739
Thomas Wouters7e474022000-07-16 12:04:32 +0000740 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000741 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000742 if (memchr(op->ob_sval, '\'', op->ob_size) &&
743 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000744 quote = '"';
745
746 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747 for (i = 0; i < op->ob_size; i++) {
748 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000749 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000750 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000751 else if (c == '\t')
752 fprintf(fp, "\\t");
753 else if (c == '\n')
754 fprintf(fp, "\\n");
755 else if (c == '\r')
756 fprintf(fp, "\\r");
757 else if (c < ' ' || c >= 0x7f)
758 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000759 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000760 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000761 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000762 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000763 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000764}
765
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000766PyObject *
767PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000769 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000770 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
771 PyObject *v;
772 if (newsize > INT_MAX) {
773 PyErr_SetString(PyExc_OverflowError,
774 "string is too large to make repr");
775 }
776 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000777 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000778 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779 }
780 else {
781 register int i;
782 register char c;
783 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000784 int quote;
785
Thomas Wouters7e474022000-07-16 12:04:32 +0000786 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000787 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000788 if (smartquotes &&
789 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000790 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000791 quote = '"';
792
Tim Peters9161c8b2001-12-03 01:55:38 +0000793 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000795 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000796 /* There's at least enough room for a hex escape
797 and a closing quote. */
798 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000800 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000801 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000802 else if (c == '\t')
803 *p++ = '\\', *p++ = 't';
804 else if (c == '\n')
805 *p++ = '\\', *p++ = 'n';
806 else if (c == '\r')
807 *p++ = '\\', *p++ = 'r';
808 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000809 /* For performance, we don't want to call
810 PyOS_snprintf here (extra layers of
811 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000812 sprintf(p, "\\x%02x", c & 0xff);
813 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814 }
815 else
816 *p++ = c;
817 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000818 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000820 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000821 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000822 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000823 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000824 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825}
826
Guido van Rossum189f1df2001-05-01 16:51:53 +0000827static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000828string_repr(PyObject *op)
829{
830 return PyString_Repr(op, 1);
831}
832
833static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000834string_str(PyObject *s)
835{
Tim Petersc9933152001-10-16 20:18:24 +0000836 assert(PyString_Check(s));
837 if (PyString_CheckExact(s)) {
838 Py_INCREF(s);
839 return s;
840 }
841 else {
842 /* Subtype -- return genuine string with the same value. */
843 PyStringObject *t = (PyStringObject *) s;
844 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
845 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000846}
847
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848static int
Fred Drakeba096332000-07-09 07:04:36 +0000849string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850{
851 return a->ob_size;
852}
853
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000854static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000855string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856{
857 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000858 register PyStringObject *op;
859 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000860#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000861 if (PyUnicode_Check(bb))
862 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000863#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000864 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000865 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000866 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000867 return NULL;
868 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000869#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000871 if ((a->ob_size == 0 || b->ob_size == 0) &&
872 PyString_CheckExact(a) && PyString_CheckExact(b)) {
873 if (a->ob_size == 0) {
874 Py_INCREF(bb);
875 return bb;
876 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000877 Py_INCREF(a);
878 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879 }
880 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000881 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000882 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000883 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000884 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000885 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000886 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000887 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000888 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000889 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
890 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
891 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000892 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000893#undef b
894}
895
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000896static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000897string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000898{
899 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000900 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000901 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000902 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903 if (n < 0)
904 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000905 /* watch out for overflows: the size can overflow int,
906 * and the # of bytes needed can overflow size_t
907 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000908 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000909 if (n && size / n != a->ob_size) {
910 PyErr_SetString(PyExc_OverflowError,
911 "repeated string is too long");
912 return NULL;
913 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000914 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000915 Py_INCREF(a);
916 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917 }
Tim Peters8f422462000-09-09 06:13:41 +0000918 nbytes = size * sizeof(char);
919 if (nbytes / sizeof(char) != (size_t)size ||
920 nbytes + sizeof(PyStringObject) <= nbytes) {
921 PyErr_SetString(PyExc_OverflowError,
922 "repeated string is too long");
923 return NULL;
924 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000926 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000927 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000929 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000930 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000931 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000932 for (i = 0; i < size; i += a->ob_size)
933 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
934 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000935 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936}
937
938/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
939
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000941string_slice(register PyStringObject *a, register int i, register int j)
942 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943{
944 if (i < 0)
945 i = 0;
946 if (j < 0)
947 j = 0; /* Avoid signed/unsigned bug in next line */
948 if (j > a->ob_size)
949 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000950 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
951 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
955 if (j < i)
956 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000958}
959
Guido van Rossum9284a572000-03-07 15:53:43 +0000960static int
Fred Drakeba096332000-07-09 07:04:36 +0000961string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000962{
Barry Warsaw817918c2002-08-06 16:58:21 +0000963 const char *lhs, *rhs, *end;
964 int size;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000965#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000966 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000967 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000968#endif
Barry Warsaw817918c2002-08-06 16:58:21 +0000969 if (!PyString_Check(el)) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000970 PyErr_SetString(PyExc_TypeError,
Barry Warsaw817918c2002-08-06 16:58:21 +0000971 "'in <string>' requires string as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000972 return -1;
973 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000974 size = PyString_Size(el);
975 rhs = PyString_AS_STRING(el);
976 lhs = PyString_AS_STRING(a);
977
978 /* optimize for a single character */
979 if (size == 1)
980 return memchr(lhs, *rhs, PyString_Size(a)) != NULL;
981
982 end = lhs + (PyString_Size(a) - size);
983 while (lhs <= end) {
984 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +0000985 return 1;
986 }
Barry Warsaw817918c2002-08-06 16:58:21 +0000987
Guido van Rossum9284a572000-03-07 15:53:43 +0000988 return 0;
989}
990
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000991static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000992string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000994 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000995 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998 return NULL;
999 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001000 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001001 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001002 if (v == NULL)
1003 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001004 else {
1005#ifdef COUNT_ALLOCS
1006 one_strings++;
1007#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001008 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001009 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001010 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001011}
1012
Martin v. Löwiscd353062001-05-24 16:56:35 +00001013static PyObject*
1014string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001015{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001016 int c;
1017 int len_a, len_b;
1018 int min_len;
1019 PyObject *result;
1020
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001021 /* Make sure both arguments are strings. */
1022 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001023 result = Py_NotImplemented;
1024 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001025 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001026 if (a == b) {
1027 switch (op) {
1028 case Py_EQ:case Py_LE:case Py_GE:
1029 result = Py_True;
1030 goto out;
1031 case Py_NE:case Py_LT:case Py_GT:
1032 result = Py_False;
1033 goto out;
1034 }
1035 }
1036 if (op == Py_EQ) {
1037 /* Supporting Py_NE here as well does not save
1038 much time, since Py_NE is rarely used. */
1039 if (a->ob_size == b->ob_size
1040 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001041 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001042 a->ob_size) == 0)) {
1043 result = Py_True;
1044 } else {
1045 result = Py_False;
1046 }
1047 goto out;
1048 }
1049 len_a = a->ob_size; len_b = b->ob_size;
1050 min_len = (len_a < len_b) ? len_a : len_b;
1051 if (min_len > 0) {
1052 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1053 if (c==0)
1054 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1055 }else
1056 c = 0;
1057 if (c == 0)
1058 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1059 switch (op) {
1060 case Py_LT: c = c < 0; break;
1061 case Py_LE: c = c <= 0; break;
1062 case Py_EQ: assert(0); break; /* unreachable */
1063 case Py_NE: c = c != 0; break;
1064 case Py_GT: c = c > 0; break;
1065 case Py_GE: c = c >= 0; break;
1066 default:
1067 result = Py_NotImplemented;
1068 goto out;
1069 }
1070 result = c ? Py_True : Py_False;
1071 out:
1072 Py_INCREF(result);
1073 return result;
1074}
1075
1076int
1077_PyString_Eq(PyObject *o1, PyObject *o2)
1078{
1079 PyStringObject *a, *b;
1080 a = (PyStringObject*)o1;
1081 b = (PyStringObject*)o2;
1082 return a->ob_size == b->ob_size
1083 && *a->ob_sval == *b->ob_sval
1084 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001085}
1086
Guido van Rossum9bfef441993-03-29 10:43:31 +00001087static long
Fred Drakeba096332000-07-09 07:04:36 +00001088string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001089{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001090 register int len;
1091 register unsigned char *p;
1092 register long x;
1093
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001094 if (a->ob_shash != -1)
1095 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +00001096 if (a->ob_sinterned != NULL)
1097 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001098 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001099 len = a->ob_size;
1100 p = (unsigned char *) a->ob_sval;
1101 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001102 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001103 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001104 x ^= a->ob_size;
1105 if (x == -1)
1106 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001107 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001108 return x;
1109}
1110
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001111static PyObject*
1112string_subscript(PyStringObject* self, PyObject* item)
1113{
1114 if (PyInt_Check(item)) {
1115 long i = PyInt_AS_LONG(item);
1116 if (i < 0)
1117 i += PyString_GET_SIZE(self);
1118 return string_item(self,i);
1119 }
1120 else if (PyLong_Check(item)) {
1121 long i = PyLong_AsLong(item);
1122 if (i == -1 && PyErr_Occurred())
1123 return NULL;
1124 if (i < 0)
1125 i += PyString_GET_SIZE(self);
1126 return string_item(self,i);
1127 }
1128 else if (PySlice_Check(item)) {
1129 int start, stop, step, slicelength, cur, i;
1130 char* source_buf;
1131 char* result_buf;
1132 PyObject* result;
1133
1134 if (PySlice_GetIndicesEx((PySliceObject*)item,
1135 PyString_GET_SIZE(self),
1136 &start, &stop, &step, &slicelength) < 0) {
1137 return NULL;
1138 }
1139
1140 if (slicelength <= 0) {
1141 return PyString_FromStringAndSize("", 0);
1142 }
1143 else {
1144 source_buf = PyString_AsString((PyObject*)self);
1145 result_buf = PyMem_Malloc(slicelength);
1146
1147 for (cur = start, i = 0; i < slicelength;
1148 cur += step, i++) {
1149 result_buf[i] = source_buf[cur];
1150 }
1151
1152 result = PyString_FromStringAndSize(result_buf,
1153 slicelength);
1154 PyMem_Free(result_buf);
1155 return result;
1156 }
1157 }
1158 else {
1159 PyErr_SetString(PyExc_TypeError,
1160 "string indices must be integers");
1161 return NULL;
1162 }
1163}
1164
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001165static int
Fred Drakeba096332000-07-09 07:04:36 +00001166string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001167{
1168 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001169 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001170 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001171 return -1;
1172 }
1173 *ptr = (void *)self->ob_sval;
1174 return self->ob_size;
1175}
1176
1177static int
Fred Drakeba096332000-07-09 07:04:36 +00001178string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001179{
Guido van Rossum045e6881997-09-08 18:30:11 +00001180 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001181 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001182 return -1;
1183}
1184
1185static int
Fred Drakeba096332000-07-09 07:04:36 +00001186string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001187{
1188 if ( lenp )
1189 *lenp = self->ob_size;
1190 return 1;
1191}
1192
Guido van Rossum1db70701998-10-08 02:18:52 +00001193static int
Fred Drakeba096332000-07-09 07:04:36 +00001194string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001195{
1196 if ( index != 0 ) {
1197 PyErr_SetString(PyExc_SystemError,
1198 "accessing non-existent string segment");
1199 return -1;
1200 }
1201 *ptr = self->ob_sval;
1202 return self->ob_size;
1203}
1204
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001205static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001206 (inquiry)string_length, /*sq_length*/
1207 (binaryfunc)string_concat, /*sq_concat*/
1208 (intargfunc)string_repeat, /*sq_repeat*/
1209 (intargfunc)string_item, /*sq_item*/
1210 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001211 0, /*sq_ass_item*/
1212 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001213 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001214};
1215
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001216static PyMappingMethods string_as_mapping = {
1217 (inquiry)string_length,
1218 (binaryfunc)string_subscript,
1219 0,
1220};
1221
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001222static PyBufferProcs string_as_buffer = {
1223 (getreadbufferproc)string_buffer_getreadbuf,
1224 (getwritebufferproc)string_buffer_getwritebuf,
1225 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001226 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001227};
1228
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001229
1230
1231#define LEFTSTRIP 0
1232#define RIGHTSTRIP 1
1233#define BOTHSTRIP 2
1234
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001235/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001236static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1237
1238#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001239
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001240
1241static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001242split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001243{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001244 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001245 PyObject* item;
1246 PyObject *list = PyList_New(0);
1247
1248 if (list == NULL)
1249 return NULL;
1250
Guido van Rossum4c08d552000-03-10 22:55:18 +00001251 for (i = j = 0; i < len; ) {
1252 while (i < len && isspace(Py_CHARMASK(s[i])))
1253 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001254 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001255 while (i < len && !isspace(Py_CHARMASK(s[i])))
1256 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001257 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001258 if (maxsplit-- <= 0)
1259 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1261 if (item == NULL)
1262 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001263 err = PyList_Append(list, item);
1264 Py_DECREF(item);
1265 if (err < 0)
1266 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001267 while (i < len && isspace(Py_CHARMASK(s[i])))
1268 i++;
1269 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001270 }
1271 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001272 if (j < len) {
1273 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1274 if (item == NULL)
1275 goto finally;
1276 err = PyList_Append(list, item);
1277 Py_DECREF(item);
1278 if (err < 0)
1279 goto finally;
1280 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281 return list;
1282 finally:
1283 Py_DECREF(list);
1284 return NULL;
1285}
1286
1287
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001288PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289"S.split([sep [,maxsplit]]) -> list of strings\n\
1290\n\
1291Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001293splits are done. If sep is not specified or is None, any\n\
1294whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001295
1296static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001297string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001298{
1299 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 int maxsplit = -1;
1301 const char *s = PyString_AS_STRING(self), *sub;
1302 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303
Guido van Rossum4c08d552000-03-10 22:55:18 +00001304 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001306 if (maxsplit < 0)
1307 maxsplit = INT_MAX;
1308 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001310 if (PyString_Check(subobj)) {
1311 sub = PyString_AS_STRING(subobj);
1312 n = PyString_GET_SIZE(subobj);
1313 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001314#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001315 else if (PyUnicode_Check(subobj))
1316 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001317#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001318 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1319 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320 if (n == 0) {
1321 PyErr_SetString(PyExc_ValueError, "empty separator");
1322 return NULL;
1323 }
1324
1325 list = PyList_New(0);
1326 if (list == NULL)
1327 return NULL;
1328
1329 i = j = 0;
1330 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001331 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 if (maxsplit-- <= 0)
1333 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1335 if (item == NULL)
1336 goto fail;
1337 err = PyList_Append(list, item);
1338 Py_DECREF(item);
1339 if (err < 0)
1340 goto fail;
1341 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001342 }
1343 else
1344 i++;
1345 }
1346 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1347 if (item == NULL)
1348 goto fail;
1349 err = PyList_Append(list, item);
1350 Py_DECREF(item);
1351 if (err < 0)
1352 goto fail;
1353
1354 return list;
1355
1356 fail:
1357 Py_DECREF(list);
1358 return NULL;
1359}
1360
1361
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001362PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001363"S.join(sequence) -> string\n\
1364\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001365Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001366sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367
1368static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001369string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001370{
1371 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001372 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001373 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374 char *p;
1375 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001376 size_t sz = 0;
1377 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001378 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379
Tim Peters19fe14e2001-01-19 03:03:47 +00001380 seq = PySequence_Fast(orig, "");
1381 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001382 if (PyErr_ExceptionMatches(PyExc_TypeError))
1383 PyErr_Format(PyExc_TypeError,
1384 "sequence expected, %.80s found",
1385 orig->ob_type->tp_name);
1386 return NULL;
1387 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001388
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001389 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001390 if (seqlen == 0) {
1391 Py_DECREF(seq);
1392 return PyString_FromString("");
1393 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001394 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001395 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001396 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1397 PyErr_Format(PyExc_TypeError,
1398 "sequence item 0: expected string,"
1399 " %.80s found",
1400 item->ob_type->tp_name);
1401 Py_DECREF(seq);
1402 return NULL;
1403 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001404 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001405 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001406 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001408
Tim Peters19fe14e2001-01-19 03:03:47 +00001409 /* There are at least two things to join. Do a pre-pass to figure out
1410 * the total amount of space we'll need (sz), see whether any argument
1411 * is absurd, and defer to the Unicode join if appropriate.
1412 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001413 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001414 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001415 item = PySequence_Fast_GET_ITEM(seq, i);
1416 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001417#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001418 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001419 /* Defer to Unicode join.
1420 * CAUTION: There's no gurantee that the
1421 * original sequence can be iterated over
1422 * again, so we must pass seq here.
1423 */
1424 PyObject *result;
1425 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001426 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001427 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001428 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001429#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001430 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001431 "sequence item %i: expected string,"
1432 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001433 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001434 Py_DECREF(seq);
1435 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001436 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001437 sz += PyString_GET_SIZE(item);
1438 if (i != 0)
1439 sz += seplen;
1440 if (sz < old_sz || sz > INT_MAX) {
1441 PyErr_SetString(PyExc_OverflowError,
1442 "join() is too long for a Python string");
1443 Py_DECREF(seq);
1444 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001446 }
1447
1448 /* Allocate result space. */
1449 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1450 if (res == NULL) {
1451 Py_DECREF(seq);
1452 return NULL;
1453 }
1454
1455 /* Catenate everything. */
1456 p = PyString_AS_STRING(res);
1457 for (i = 0; i < seqlen; ++i) {
1458 size_t n;
1459 item = PySequence_Fast_GET_ITEM(seq, i);
1460 n = PyString_GET_SIZE(item);
1461 memcpy(p, PyString_AS_STRING(item), n);
1462 p += n;
1463 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001464 memcpy(p, sep, seplen);
1465 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001466 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001468
Jeremy Hylton49048292000-07-11 03:28:17 +00001469 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471}
1472
Tim Peters52e155e2001-06-16 05:42:57 +00001473PyObject *
1474_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001475{
Tim Petersa7259592001-06-16 05:11:17 +00001476 assert(sep != NULL && PyString_Check(sep));
1477 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001478 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001479}
1480
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001481static void
1482string_adjust_indices(int *start, int *end, int len)
1483{
1484 if (*end > len)
1485 *end = len;
1486 else if (*end < 0)
1487 *end += len;
1488 if (*end < 0)
1489 *end = 0;
1490 if (*start < 0)
1491 *start += len;
1492 if (*start < 0)
1493 *start = 0;
1494}
1495
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496static long
Fred Drakeba096332000-07-09 07:04:36 +00001497string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500 int len = PyString_GET_SIZE(self);
1501 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001502 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001504 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001505 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001506 return -2;
1507 if (PyString_Check(subobj)) {
1508 sub = PyString_AS_STRING(subobj);
1509 n = PyString_GET_SIZE(subobj);
1510 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001511#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001512 else if (PyUnicode_Check(subobj))
1513 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001514#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001515 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516 return -2;
1517
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001518 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001519
Guido van Rossum4c08d552000-03-10 22:55:18 +00001520 if (dir > 0) {
1521 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001523 last -= n;
1524 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001525 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001526 return (long)i;
1527 }
1528 else {
1529 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001530
Guido van Rossum4c08d552000-03-10 22:55:18 +00001531 if (n == 0 && i <= last)
1532 return (long)last;
1533 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001534 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535 return (long)j;
1536 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001537
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001538 return -1;
1539}
1540
1541
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001542PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001543"S.find(sub [,start [,end]]) -> int\n\
1544\n\
1545Return the lowest index in S where substring sub is found,\n\
1546such that sub is contained within s[start,end]. Optional\n\
1547arguments start and end are interpreted as in slice notation.\n\
1548\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001549Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001550
1551static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001552string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001554 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555 if (result == -2)
1556 return NULL;
1557 return PyInt_FromLong(result);
1558}
1559
1560
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001561PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001562"S.index(sub [,start [,end]]) -> int\n\
1563\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001564Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001565
1566static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001567string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001568{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570 if (result == -2)
1571 return NULL;
1572 if (result == -1) {
1573 PyErr_SetString(PyExc_ValueError,
1574 "substring not found in string.index");
1575 return NULL;
1576 }
1577 return PyInt_FromLong(result);
1578}
1579
1580
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001581PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582"S.rfind(sub [,start [,end]]) -> int\n\
1583\n\
1584Return the highest index in S where substring sub is found,\n\
1585such that sub is contained within s[start,end]. Optional\n\
1586arguments start and end are interpreted as in slice notation.\n\
1587\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001588Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589
1590static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001591string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001593 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594 if (result == -2)
1595 return NULL;
1596 return PyInt_FromLong(result);
1597}
1598
1599
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001600PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601"S.rindex(sub [,start [,end]]) -> int\n\
1602\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001603Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604
1605static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001606string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001608 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609 if (result == -2)
1610 return NULL;
1611 if (result == -1) {
1612 PyErr_SetString(PyExc_ValueError,
1613 "substring not found in string.rindex");
1614 return NULL;
1615 }
1616 return PyInt_FromLong(result);
1617}
1618
1619
1620static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001621do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1622{
1623 char *s = PyString_AS_STRING(self);
1624 int len = PyString_GET_SIZE(self);
1625 char *sep = PyString_AS_STRING(sepobj);
1626 int seplen = PyString_GET_SIZE(sepobj);
1627 int i, j;
1628
1629 i = 0;
1630 if (striptype != RIGHTSTRIP) {
1631 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1632 i++;
1633 }
1634 }
1635
1636 j = len;
1637 if (striptype != LEFTSTRIP) {
1638 do {
1639 j--;
1640 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1641 j++;
1642 }
1643
1644 if (i == 0 && j == len && PyString_CheckExact(self)) {
1645 Py_INCREF(self);
1646 return (PyObject*)self;
1647 }
1648 else
1649 return PyString_FromStringAndSize(s+i, j-i);
1650}
1651
1652
1653static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001654do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001655{
1656 char *s = PyString_AS_STRING(self);
1657 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001659 i = 0;
1660 if (striptype != RIGHTSTRIP) {
1661 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1662 i++;
1663 }
1664 }
1665
1666 j = len;
1667 if (striptype != LEFTSTRIP) {
1668 do {
1669 j--;
1670 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1671 j++;
1672 }
1673
Tim Peters8fa5dd02001-09-12 02:18:30 +00001674 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675 Py_INCREF(self);
1676 return (PyObject*)self;
1677 }
1678 else
1679 return PyString_FromStringAndSize(s+i, j-i);
1680}
1681
1682
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001683static PyObject *
1684do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1685{
1686 PyObject *sep = NULL;
1687
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001688 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001689 return NULL;
1690
1691 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001692 if (PyString_Check(sep))
1693 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001694#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001695 else if (PyUnicode_Check(sep)) {
1696 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1697 PyObject *res;
1698 if (uniself==NULL)
1699 return NULL;
1700 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1701 striptype, sep);
1702 Py_DECREF(uniself);
1703 return res;
1704 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001705#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001706 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001707 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001708#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001709 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001710#else
1711 "%s arg must be None or str",
1712#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001713 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001714 return NULL;
1715 }
1716 return do_xstrip(self, striptype, sep);
1717 }
1718
1719 return do_strip(self, striptype);
1720}
1721
1722
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001723PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001724"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725\n\
1726Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001727whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001728If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001729If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730
1731static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001732string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001734 if (PyTuple_GET_SIZE(args) == 0)
1735 return do_strip(self, BOTHSTRIP); /* Common case */
1736 else
1737 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738}
1739
1740
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001741PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001742"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001744Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001745If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001746If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747
1748static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001749string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001751 if (PyTuple_GET_SIZE(args) == 0)
1752 return do_strip(self, LEFTSTRIP); /* Common case */
1753 else
1754 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755}
1756
1757
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001758PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001759"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001761Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001762If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001763If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764
1765static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001766string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001768 if (PyTuple_GET_SIZE(args) == 0)
1769 return do_strip(self, RIGHTSTRIP); /* Common case */
1770 else
1771 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772}
1773
1774
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776"S.lower() -> string\n\
1777\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001778Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779
1780static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001781string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782{
1783 char *s = PyString_AS_STRING(self), *s_new;
1784 int i, n = PyString_GET_SIZE(self);
1785 PyObject *new;
1786
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787 new = PyString_FromStringAndSize(NULL, n);
1788 if (new == NULL)
1789 return NULL;
1790 s_new = PyString_AsString(new);
1791 for (i = 0; i < n; i++) {
1792 int c = Py_CHARMASK(*s++);
1793 if (isupper(c)) {
1794 *s_new = tolower(c);
1795 } else
1796 *s_new = c;
1797 s_new++;
1798 }
1799 return new;
1800}
1801
1802
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001803PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804"S.upper() -> string\n\
1805\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001806Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807
1808static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001809string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810{
1811 char *s = PyString_AS_STRING(self), *s_new;
1812 int i, n = PyString_GET_SIZE(self);
1813 PyObject *new;
1814
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815 new = PyString_FromStringAndSize(NULL, n);
1816 if (new == NULL)
1817 return NULL;
1818 s_new = PyString_AsString(new);
1819 for (i = 0; i < n; i++) {
1820 int c = Py_CHARMASK(*s++);
1821 if (islower(c)) {
1822 *s_new = toupper(c);
1823 } else
1824 *s_new = c;
1825 s_new++;
1826 }
1827 return new;
1828}
1829
1830
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001831PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001832"S.title() -> string\n\
1833\n\
1834Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001835characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001836
1837static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001838string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001839{
1840 char *s = PyString_AS_STRING(self), *s_new;
1841 int i, n = PyString_GET_SIZE(self);
1842 int previous_is_cased = 0;
1843 PyObject *new;
1844
Guido van Rossum4c08d552000-03-10 22:55:18 +00001845 new = PyString_FromStringAndSize(NULL, n);
1846 if (new == NULL)
1847 return NULL;
1848 s_new = PyString_AsString(new);
1849 for (i = 0; i < n; i++) {
1850 int c = Py_CHARMASK(*s++);
1851 if (islower(c)) {
1852 if (!previous_is_cased)
1853 c = toupper(c);
1854 previous_is_cased = 1;
1855 } else if (isupper(c)) {
1856 if (previous_is_cased)
1857 c = tolower(c);
1858 previous_is_cased = 1;
1859 } else
1860 previous_is_cased = 0;
1861 *s_new++ = c;
1862 }
1863 return new;
1864}
1865
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001866PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867"S.capitalize() -> string\n\
1868\n\
1869Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001870capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871
1872static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001873string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001874{
1875 char *s = PyString_AS_STRING(self), *s_new;
1876 int i, n = PyString_GET_SIZE(self);
1877 PyObject *new;
1878
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879 new = PyString_FromStringAndSize(NULL, n);
1880 if (new == NULL)
1881 return NULL;
1882 s_new = PyString_AsString(new);
1883 if (0 < n) {
1884 int c = Py_CHARMASK(*s++);
1885 if (islower(c))
1886 *s_new = toupper(c);
1887 else
1888 *s_new = c;
1889 s_new++;
1890 }
1891 for (i = 1; i < n; i++) {
1892 int c = Py_CHARMASK(*s++);
1893 if (isupper(c))
1894 *s_new = tolower(c);
1895 else
1896 *s_new = c;
1897 s_new++;
1898 }
1899 return new;
1900}
1901
1902
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001903PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904"S.count(sub[, start[, end]]) -> int\n\
1905\n\
1906Return the number of occurrences of substring sub in string\n\
1907S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001908interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909
1910static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001911string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001913 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914 int len = PyString_GET_SIZE(self), n;
1915 int i = 0, last = INT_MAX;
1916 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001917 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918
Guido van Rossumc6821402000-05-08 14:08:05 +00001919 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1920 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001922
Guido van Rossum4c08d552000-03-10 22:55:18 +00001923 if (PyString_Check(subobj)) {
1924 sub = PyString_AS_STRING(subobj);
1925 n = PyString_GET_SIZE(subobj);
1926 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001927#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001928 else if (PyUnicode_Check(subobj)) {
1929 int count;
1930 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1931 if (count == -1)
1932 return NULL;
1933 else
1934 return PyInt_FromLong((long) count);
1935 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001936#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001937 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1938 return NULL;
1939
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001940 string_adjust_indices(&i, &last, len);
1941
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942 m = last + 1 - n;
1943 if (n == 0)
1944 return PyInt_FromLong((long) (m-i));
1945
1946 r = 0;
1947 while (i < m) {
1948 if (!memcmp(s+i, sub, n)) {
1949 r++;
1950 i += n;
1951 } else {
1952 i++;
1953 }
1954 }
1955 return PyInt_FromLong((long) r);
1956}
1957
1958
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001959PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001960"S.swapcase() -> string\n\
1961\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001962Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001963converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964
1965static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001966string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967{
1968 char *s = PyString_AS_STRING(self), *s_new;
1969 int i, n = PyString_GET_SIZE(self);
1970 PyObject *new;
1971
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972 new = PyString_FromStringAndSize(NULL, n);
1973 if (new == NULL)
1974 return NULL;
1975 s_new = PyString_AsString(new);
1976 for (i = 0; i < n; i++) {
1977 int c = Py_CHARMASK(*s++);
1978 if (islower(c)) {
1979 *s_new = toupper(c);
1980 }
1981 else if (isupper(c)) {
1982 *s_new = tolower(c);
1983 }
1984 else
1985 *s_new = c;
1986 s_new++;
1987 }
1988 return new;
1989}
1990
1991
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001992PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993"S.translate(table [,deletechars]) -> string\n\
1994\n\
1995Return a copy of the string S, where all characters occurring\n\
1996in the optional argument deletechars are removed, and the\n\
1997remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001998translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999
2000static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002001string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002002{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002003 register char *input, *output;
2004 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005 register int i, c, changed = 0;
2006 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002007 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 int inlen, tablen, dellen = 0;
2009 PyObject *result;
2010 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002011 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012
Guido van Rossum4c08d552000-03-10 22:55:18 +00002013 if (!PyArg_ParseTuple(args, "O|O:translate",
2014 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002016
2017 if (PyString_Check(tableobj)) {
2018 table1 = PyString_AS_STRING(tableobj);
2019 tablen = PyString_GET_SIZE(tableobj);
2020 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002021#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002022 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002023 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002024 parameter; instead a mapping to None will cause characters
2025 to be deleted. */
2026 if (delobj != NULL) {
2027 PyErr_SetString(PyExc_TypeError,
2028 "deletions are implemented differently for unicode");
2029 return NULL;
2030 }
2031 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2032 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002033#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002034 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002036
2037 if (delobj != NULL) {
2038 if (PyString_Check(delobj)) {
2039 del_table = PyString_AS_STRING(delobj);
2040 dellen = PyString_GET_SIZE(delobj);
2041 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002042#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002043 else if (PyUnicode_Check(delobj)) {
2044 PyErr_SetString(PyExc_TypeError,
2045 "deletions are implemented differently for unicode");
2046 return NULL;
2047 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002048#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002049 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2050 return NULL;
2051
2052 if (tablen != 256) {
2053 PyErr_SetString(PyExc_ValueError,
2054 "translation table must be 256 characters long");
2055 return NULL;
2056 }
2057 }
2058 else {
2059 del_table = NULL;
2060 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061 }
2062
2063 table = table1;
2064 inlen = PyString_Size(input_obj);
2065 result = PyString_FromStringAndSize((char *)NULL, inlen);
2066 if (result == NULL)
2067 return NULL;
2068 output_start = output = PyString_AsString(result);
2069 input = PyString_AsString(input_obj);
2070
2071 if (dellen == 0) {
2072 /* If no deletions are required, use faster code */
2073 for (i = inlen; --i >= 0; ) {
2074 c = Py_CHARMASK(*input++);
2075 if (Py_CHARMASK((*output++ = table[c])) != c)
2076 changed = 1;
2077 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002078 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079 return result;
2080 Py_DECREF(result);
2081 Py_INCREF(input_obj);
2082 return input_obj;
2083 }
2084
2085 for (i = 0; i < 256; i++)
2086 trans_table[i] = Py_CHARMASK(table[i]);
2087
2088 for (i = 0; i < dellen; i++)
2089 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2090
2091 for (i = inlen; --i >= 0; ) {
2092 c = Py_CHARMASK(*input++);
2093 if (trans_table[c] != -1)
2094 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2095 continue;
2096 changed = 1;
2097 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002098 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099 Py_DECREF(result);
2100 Py_INCREF(input_obj);
2101 return input_obj;
2102 }
2103 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002104 if (inlen > 0)
2105 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106 return result;
2107}
2108
2109
2110/* What follows is used for implementing replace(). Perry Stoll. */
2111
2112/*
2113 mymemfind
2114
2115 strstr replacement for arbitrary blocks of memory.
2116
Barry Warsaw51ac5802000-03-20 16:36:48 +00002117 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118 contents of memory pointed to by PAT. Returns the index into MEM if
2119 found, or -1 if not found. If len of PAT is greater than length of
2120 MEM, the function returns -1.
2121*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002122static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002123mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124{
2125 register int ii;
2126
2127 /* pattern can not occur in the last pat_len-1 chars */
2128 len -= pat_len;
2129
2130 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002131 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132 return ii;
2133 }
2134 }
2135 return -1;
2136}
2137
2138/*
2139 mymemcnt
2140
2141 Return the number of distinct times PAT is found in MEM.
2142 meaning mem=1111 and pat==11 returns 2.
2143 mem=11111 and pat==11 also return 2.
2144 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002145static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002146mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147{
2148 register int offset = 0;
2149 int nfound = 0;
2150
2151 while (len >= 0) {
2152 offset = mymemfind(mem, len, pat, pat_len);
2153 if (offset == -1)
2154 break;
2155 mem += offset + pat_len;
2156 len -= offset + pat_len;
2157 nfound++;
2158 }
2159 return nfound;
2160}
2161
2162/*
2163 mymemreplace
2164
Thomas Wouters7e474022000-07-16 12:04:32 +00002165 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166 replaced with SUB.
2167
Thomas Wouters7e474022000-07-16 12:04:32 +00002168 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169 of PAT in STR, then the original string is returned. Otherwise, a new
2170 string is allocated here and returned.
2171
2172 on return, out_len is:
2173 the length of output string, or
2174 -1 if the input string is returned, or
2175 unchanged if an error occurs (no memory).
2176
2177 return value is:
2178 the new string allocated locally, or
2179 NULL if an error occurred.
2180*/
2181static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002182mymemreplace(const char *str, int len, /* input string */
2183 const char *pat, int pat_len, /* pattern string to find */
2184 const char *sub, int sub_len, /* substitution string */
2185 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002186 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187{
2188 char *out_s;
2189 char *new_s;
2190 int nfound, offset, new_len;
2191
2192 if (len == 0 || pat_len > len)
2193 goto return_same;
2194
2195 /* find length of output string */
2196 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00002197 if (count < 0)
2198 count = INT_MAX;
2199 else if (nfound > count)
2200 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002201 if (nfound == 0)
2202 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002203
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002204 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002205 if (new_len == 0) {
2206 /* Have to allocate something for the caller to free(). */
2207 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002208 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002209 return NULL;
2210 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002212 else {
2213 assert(new_len > 0);
2214 new_s = (char *)PyMem_MALLOC(new_len);
2215 if (new_s == NULL)
2216 return NULL;
2217 out_s = new_s;
2218
Tim Peters9c012af2001-05-10 00:32:57 +00002219 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00002220 /* find index of next instance of pattern */
2221 offset = mymemfind(str, len, pat, pat_len);
2222 if (offset == -1)
2223 break;
2224
2225 /* copy non matching part of input string */
2226 memcpy(new_s, str, offset);
2227 str += offset + pat_len;
2228 len -= offset + pat_len;
2229
2230 /* copy substitute into the output string */
2231 new_s += offset;
2232 memcpy(new_s, sub, sub_len);
2233 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002234 }
2235 /* copy any remaining values into output string */
2236 if (len > 0)
2237 memcpy(new_s, str, len);
2238 }
2239 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 return out_s;
2241
2242 return_same:
2243 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002244 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002245}
2246
2247
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002248PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002249"S.replace (old, new[, maxsplit]) -> string\n\
2250\n\
2251Return a copy of string S with all occurrences of substring\n\
2252old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002253given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254
2255static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002256string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002257{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002258 const char *str = PyString_AS_STRING(self), *sub, *repl;
2259 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002260 const int len = PyString_GET_SIZE(self);
2261 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002264 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265
Guido van Rossum4c08d552000-03-10 22:55:18 +00002266 if (!PyArg_ParseTuple(args, "OO|i:replace",
2267 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269
2270 if (PyString_Check(subobj)) {
2271 sub = PyString_AS_STRING(subobj);
2272 sub_len = PyString_GET_SIZE(subobj);
2273 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002274#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002275 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002276 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002277 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002278#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002279 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2280 return NULL;
2281
2282 if (PyString_Check(replobj)) {
2283 repl = PyString_AS_STRING(replobj);
2284 repl_len = PyString_GET_SIZE(replobj);
2285 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002286#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002288 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002289 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002290#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002291 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2292 return NULL;
2293
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002294 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002295 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296 return NULL;
2297 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002298 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299 if (new_s == NULL) {
2300 PyErr_NoMemory();
2301 return NULL;
2302 }
2303 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002304 if (PyString_CheckExact(self)) {
2305 /* we're returning another reference to self */
2306 new = (PyObject*)self;
2307 Py_INCREF(new);
2308 }
2309 else {
2310 new = PyString_FromStringAndSize(str, len);
2311 if (new == NULL)
2312 return NULL;
2313 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314 }
2315 else {
2316 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002317 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002318 }
2319 return new;
2320}
2321
2322
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002323PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002324"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002325\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002326Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002328comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002329
2330static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002331string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002333 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002335 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336 int plen;
2337 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002338 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002339 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340
Guido van Rossumc6821402000-05-08 14:08:05 +00002341 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2342 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002343 return NULL;
2344 if (PyString_Check(subobj)) {
2345 prefix = PyString_AS_STRING(subobj);
2346 plen = PyString_GET_SIZE(subobj);
2347 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002348#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002349 else if (PyUnicode_Check(subobj)) {
2350 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002351 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002352 subobj, start, end, -1);
2353 if (rc == -1)
2354 return NULL;
2355 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002356 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002357 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002358#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 return NULL;
2361
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002362 string_adjust_indices(&start, &end, len);
2363
2364 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002365 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002366
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002367 if (end-start >= plen)
2368 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2369 else
2370 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002371}
2372
2373
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002374PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002375"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002376\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002377Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002379comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380
2381static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002382string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002386 const char* suffix;
2387 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002388 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002389 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391
Guido van Rossumc6821402000-05-08 14:08:05 +00002392 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2393 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002394 return NULL;
2395 if (PyString_Check(subobj)) {
2396 suffix = PyString_AS_STRING(subobj);
2397 slen = PyString_GET_SIZE(subobj);
2398 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002399#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002400 else if (PyUnicode_Check(subobj)) {
2401 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002402 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002403 subobj, start, end, +1);
2404 if (rc == -1)
2405 return NULL;
2406 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002407 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002408 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002409#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002410 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411 return NULL;
2412
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002413 string_adjust_indices(&start, &end, len);
2414
2415 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002416 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002417
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002418 if (end-slen > start)
2419 start = end - slen;
2420 if (end-start >= slen)
2421 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2422 else
2423 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424}
2425
2426
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002427PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002428"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002429\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002430Encodes S using the codec registered for encoding. encoding defaults\n\
2431to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002432handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002433a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002434
2435static PyObject *
2436string_encode(PyStringObject *self, PyObject *args)
2437{
2438 char *encoding = NULL;
2439 char *errors = NULL;
2440 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2441 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002442 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2443}
2444
2445
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002446PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002447"S.decode([encoding[,errors]]) -> object\n\
2448\n\
2449Decodes S using the codec registered for encoding. encoding defaults\n\
2450to the default encoding. errors may be given to set a different error\n\
2451handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002452a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002453
2454static PyObject *
2455string_decode(PyStringObject *self, PyObject *args)
2456{
2457 char *encoding = NULL;
2458 char *errors = NULL;
2459 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2460 return NULL;
2461 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002462}
2463
2464
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002465PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002466"S.expandtabs([tabsize]) -> string\n\
2467\n\
2468Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002469If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002470
2471static PyObject*
2472string_expandtabs(PyStringObject *self, PyObject *args)
2473{
2474 const char *e, *p;
2475 char *q;
2476 int i, j;
2477 PyObject *u;
2478 int tabsize = 8;
2479
2480 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2481 return NULL;
2482
Thomas Wouters7e474022000-07-16 12:04:32 +00002483 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002484 i = j = 0;
2485 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2486 for (p = PyString_AS_STRING(self); p < e; p++)
2487 if (*p == '\t') {
2488 if (tabsize > 0)
2489 j += tabsize - (j % tabsize);
2490 }
2491 else {
2492 j++;
2493 if (*p == '\n' || *p == '\r') {
2494 i += j;
2495 j = 0;
2496 }
2497 }
2498
2499 /* Second pass: create output string and fill it */
2500 u = PyString_FromStringAndSize(NULL, i + j);
2501 if (!u)
2502 return NULL;
2503
2504 j = 0;
2505 q = PyString_AS_STRING(u);
2506
2507 for (p = PyString_AS_STRING(self); p < e; p++)
2508 if (*p == '\t') {
2509 if (tabsize > 0) {
2510 i = tabsize - (j % tabsize);
2511 j += i;
2512 while (i--)
2513 *q++ = ' ';
2514 }
2515 }
2516 else {
2517 j++;
2518 *q++ = *p;
2519 if (*p == '\n' || *p == '\r')
2520 j = 0;
2521 }
2522
2523 return u;
2524}
2525
Tim Peters8fa5dd02001-09-12 02:18:30 +00002526static PyObject *
2527pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528{
2529 PyObject *u;
2530
2531 if (left < 0)
2532 left = 0;
2533 if (right < 0)
2534 right = 0;
2535
Tim Peters8fa5dd02001-09-12 02:18:30 +00002536 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002537 Py_INCREF(self);
2538 return (PyObject *)self;
2539 }
2540
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002541 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 left + PyString_GET_SIZE(self) + right);
2543 if (u) {
2544 if (left)
2545 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002546 memcpy(PyString_AS_STRING(u) + left,
2547 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002548 PyString_GET_SIZE(self));
2549 if (right)
2550 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2551 fill, right);
2552 }
2553
2554 return u;
2555}
2556
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002557PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002558"S.ljust(width) -> string\n"
2559"\n"
2560"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002561"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002562
2563static PyObject *
2564string_ljust(PyStringObject *self, PyObject *args)
2565{
2566 int width;
2567 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2568 return NULL;
2569
Tim Peters8fa5dd02001-09-12 02:18:30 +00002570 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002571 Py_INCREF(self);
2572 return (PyObject*) self;
2573 }
2574
2575 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2576}
2577
2578
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002579PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002580"S.rjust(width) -> string\n"
2581"\n"
2582"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002583"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002584
2585static PyObject *
2586string_rjust(PyStringObject *self, PyObject *args)
2587{
2588 int width;
2589 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2590 return NULL;
2591
Tim Peters8fa5dd02001-09-12 02:18:30 +00002592 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002593 Py_INCREF(self);
2594 return (PyObject*) self;
2595 }
2596
2597 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2598}
2599
2600
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002601PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002602"S.center(width) -> string\n"
2603"\n"
2604"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002605"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606
2607static PyObject *
2608string_center(PyStringObject *self, PyObject *args)
2609{
2610 int marg, left;
2611 int width;
2612
2613 if (!PyArg_ParseTuple(args, "i:center", &width))
2614 return NULL;
2615
Tim Peters8fa5dd02001-09-12 02:18:30 +00002616 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002617 Py_INCREF(self);
2618 return (PyObject*) self;
2619 }
2620
2621 marg = width - PyString_GET_SIZE(self);
2622 left = marg / 2 + (marg & width & 1);
2623
2624 return pad(self, left, marg - left, ' ');
2625}
2626
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002627PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002628"S.zfill(width) -> string\n"
2629"\n"
2630"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002631"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002632
2633static PyObject *
2634string_zfill(PyStringObject *self, PyObject *args)
2635{
2636 int fill;
2637 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002638 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002639
2640 int width;
2641 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2642 return NULL;
2643
2644 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002645 if (PyString_CheckExact(self)) {
2646 Py_INCREF(self);
2647 return (PyObject*) self;
2648 }
2649 else
2650 return PyString_FromStringAndSize(
2651 PyString_AS_STRING(self),
2652 PyString_GET_SIZE(self)
2653 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002654 }
2655
2656 fill = width - PyString_GET_SIZE(self);
2657
2658 s = pad(self, fill, 0, '0');
2659
2660 if (s == NULL)
2661 return NULL;
2662
2663 p = PyString_AS_STRING(s);
2664 if (p[fill] == '+' || p[fill] == '-') {
2665 /* move sign to beginning of string */
2666 p[0] = p[fill];
2667 p[fill] = '0';
2668 }
2669
2670 return (PyObject*) s;
2671}
2672
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002673PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002674"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002675"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002676"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002677"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002678
2679static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002680string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002681{
Fred Drakeba096332000-07-09 07:04:36 +00002682 register const unsigned char *p
2683 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002684 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002685
Guido van Rossum4c08d552000-03-10 22:55:18 +00002686 /* Shortcut for single character strings */
2687 if (PyString_GET_SIZE(self) == 1 &&
2688 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002689 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002690
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002691 /* Special case for empty strings */
2692 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002693 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002694
Guido van Rossum4c08d552000-03-10 22:55:18 +00002695 e = p + PyString_GET_SIZE(self);
2696 for (; p < e; p++) {
2697 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002698 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002699 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002700 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002701}
2702
2703
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002704PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002705"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002706\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002707Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002708and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002709
2710static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002711string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002712{
Fred Drakeba096332000-07-09 07:04:36 +00002713 register const unsigned char *p
2714 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002715 register const unsigned char *e;
2716
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002717 /* Shortcut for single character strings */
2718 if (PyString_GET_SIZE(self) == 1 &&
2719 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002720 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002721
2722 /* Special case for empty strings */
2723 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002724 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002725
2726 e = p + PyString_GET_SIZE(self);
2727 for (; p < e; p++) {
2728 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002729 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002730 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002731 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002732}
2733
2734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002735PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002736"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002737\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002738Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002739and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002740
2741static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002742string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002743{
Fred Drakeba096332000-07-09 07:04:36 +00002744 register const unsigned char *p
2745 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002746 register const unsigned char *e;
2747
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002748 /* Shortcut for single character strings */
2749 if (PyString_GET_SIZE(self) == 1 &&
2750 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002751 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002752
2753 /* Special case for empty strings */
2754 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002755 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002756
2757 e = p + PyString_GET_SIZE(self);
2758 for (; p < e; p++) {
2759 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002760 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002761 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002762 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002763}
2764
2765
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002766PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002767"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002768\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002769Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002770False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002771
2772static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002773string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002774{
Fred Drakeba096332000-07-09 07:04:36 +00002775 register const unsigned char *p
2776 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002777 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002778
Guido van Rossum4c08d552000-03-10 22:55:18 +00002779 /* Shortcut for single character strings */
2780 if (PyString_GET_SIZE(self) == 1 &&
2781 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002782 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002783
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002784 /* Special case for empty strings */
2785 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002786 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002787
Guido van Rossum4c08d552000-03-10 22:55:18 +00002788 e = p + PyString_GET_SIZE(self);
2789 for (; p < e; p++) {
2790 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002791 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002792 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002793 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002794}
2795
2796
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002797PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002798"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002799\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002800Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002801at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002802
2803static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002804string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002805{
Fred Drakeba096332000-07-09 07:04:36 +00002806 register const unsigned char *p
2807 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002808 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002809 int cased;
2810
Guido van Rossum4c08d552000-03-10 22:55:18 +00002811 /* Shortcut for single character strings */
2812 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002813 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002814
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002815 /* Special case for empty strings */
2816 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002817 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002818
Guido van Rossum4c08d552000-03-10 22:55:18 +00002819 e = p + PyString_GET_SIZE(self);
2820 cased = 0;
2821 for (; p < e; p++) {
2822 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002823 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002824 else if (!cased && islower(*p))
2825 cased = 1;
2826 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002827 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002828}
2829
2830
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002831PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002832"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002833\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002834Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002835at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002836
2837static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002838string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002839{
Fred Drakeba096332000-07-09 07:04:36 +00002840 register const unsigned char *p
2841 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002842 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002843 int cased;
2844
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845 /* Shortcut for single character strings */
2846 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002847 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002848
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002849 /* Special case for empty strings */
2850 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002851 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002852
Guido van Rossum4c08d552000-03-10 22:55:18 +00002853 e = p + PyString_GET_SIZE(self);
2854 cased = 0;
2855 for (; p < e; p++) {
2856 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002857 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002858 else if (!cased && isupper(*p))
2859 cased = 1;
2860 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002861 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002862}
2863
2864
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002865PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002866"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002868Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002870ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871
2872static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002873string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002874{
Fred Drakeba096332000-07-09 07:04:36 +00002875 register const unsigned char *p
2876 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002877 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002878 int cased, previous_is_cased;
2879
Guido van Rossum4c08d552000-03-10 22:55:18 +00002880 /* Shortcut for single character strings */
2881 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002882 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002883
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002884 /* Special case for empty strings */
2885 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002886 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002887
Guido van Rossum4c08d552000-03-10 22:55:18 +00002888 e = p + PyString_GET_SIZE(self);
2889 cased = 0;
2890 previous_is_cased = 0;
2891 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002892 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002893
2894 if (isupper(ch)) {
2895 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002896 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002897 previous_is_cased = 1;
2898 cased = 1;
2899 }
2900 else if (islower(ch)) {
2901 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002902 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002903 previous_is_cased = 1;
2904 cased = 1;
2905 }
2906 else
2907 previous_is_cased = 0;
2908 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002909 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002910}
2911
2912
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002913PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002914"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002915\n\
2916Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002917Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002918is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002919
2920#define SPLIT_APPEND(data, left, right) \
2921 str = PyString_FromStringAndSize(data + left, right - left); \
2922 if (!str) \
2923 goto onError; \
2924 if (PyList_Append(list, str)) { \
2925 Py_DECREF(str); \
2926 goto onError; \
2927 } \
2928 else \
2929 Py_DECREF(str);
2930
2931static PyObject*
2932string_splitlines(PyStringObject *self, PyObject *args)
2933{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002934 register int i;
2935 register int j;
2936 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002937 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002938 PyObject *list;
2939 PyObject *str;
2940 char *data;
2941
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002942 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002943 return NULL;
2944
2945 data = PyString_AS_STRING(self);
2946 len = PyString_GET_SIZE(self);
2947
Guido van Rossum4c08d552000-03-10 22:55:18 +00002948 list = PyList_New(0);
2949 if (!list)
2950 goto onError;
2951
2952 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002953 int eol;
2954
Guido van Rossum4c08d552000-03-10 22:55:18 +00002955 /* Find a line and append it */
2956 while (i < len && data[i] != '\n' && data[i] != '\r')
2957 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002958
2959 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002960 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002961 if (i < len) {
2962 if (data[i] == '\r' && i + 1 < len &&
2963 data[i+1] == '\n')
2964 i += 2;
2965 else
2966 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002967 if (keepends)
2968 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002969 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002970 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002971 j = i;
2972 }
2973 if (j < len) {
2974 SPLIT_APPEND(data, j, len);
2975 }
2976
2977 return list;
2978
2979 onError:
2980 Py_DECREF(list);
2981 return NULL;
2982}
2983
2984#undef SPLIT_APPEND
2985
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002986
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002987static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002988string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002989 /* Counterparts of the obsolete stropmodule functions; except
2990 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002991 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2992 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2993 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2994 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002995 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2996 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2997 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2998 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2999 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3000 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3001 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003002 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3003 capitalize__doc__},
3004 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3005 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3006 endswith__doc__},
3007 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3008 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3009 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3010 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3011 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3012 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3013 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3014 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3015 startswith__doc__},
3016 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3017 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3018 swapcase__doc__},
3019 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3020 translate__doc__},
3021 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3022 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3023 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3024 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3025 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3026 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3027 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3028 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3029 expandtabs__doc__},
3030 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3031 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003032 {NULL, NULL} /* sentinel */
3033};
3034
Jeremy Hylton938ace62002-07-17 16:30:39 +00003035static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003036str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3037
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003038static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003039string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003040{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003041 PyObject *x = NULL;
3042 static char *kwlist[] = {"object", 0};
3043
Guido van Rossumae960af2001-08-30 03:11:59 +00003044 if (type != &PyString_Type)
3045 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003046 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3047 return NULL;
3048 if (x == NULL)
3049 return PyString_FromString("");
3050 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003051}
3052
Guido van Rossumae960af2001-08-30 03:11:59 +00003053static PyObject *
3054str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3055{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003056 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003057 int n;
3058
3059 assert(PyType_IsSubtype(type, &PyString_Type));
3060 tmp = string_new(&PyString_Type, args, kwds);
3061 if (tmp == NULL)
3062 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003063 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003064 n = PyString_GET_SIZE(tmp);
3065 pnew = type->tp_alloc(type, n);
3066 if (pnew != NULL) {
3067 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003068 ((PyStringObject *)pnew)->ob_shash =
3069 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003070 ((PyStringObject *)pnew)->ob_sinterned =
3071 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003072 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003073 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003074 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003075}
3076
Guido van Rossumcacfc072002-05-24 19:01:59 +00003077static PyObject *
3078basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3079{
3080 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003081 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003082 return NULL;
3083}
3084
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003085PyDoc_STRVAR(basestring_doc,
3086"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003087
3088PyTypeObject PyBaseString_Type = {
3089 PyObject_HEAD_INIT(&PyType_Type)
3090 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003091 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003092 0,
3093 0,
3094 0, /* tp_dealloc */
3095 0, /* tp_print */
3096 0, /* tp_getattr */
3097 0, /* tp_setattr */
3098 0, /* tp_compare */
3099 0, /* tp_repr */
3100 0, /* tp_as_number */
3101 0, /* tp_as_sequence */
3102 0, /* tp_as_mapping */
3103 0, /* tp_hash */
3104 0, /* tp_call */
3105 0, /* tp_str */
3106 0, /* tp_getattro */
3107 0, /* tp_setattro */
3108 0, /* tp_as_buffer */
3109 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3110 basestring_doc, /* tp_doc */
3111 0, /* tp_traverse */
3112 0, /* tp_clear */
3113 0, /* tp_richcompare */
3114 0, /* tp_weaklistoffset */
3115 0, /* tp_iter */
3116 0, /* tp_iternext */
3117 0, /* tp_methods */
3118 0, /* tp_members */
3119 0, /* tp_getset */
3120 &PyBaseObject_Type, /* tp_base */
3121 0, /* tp_dict */
3122 0, /* tp_descr_get */
3123 0, /* tp_descr_set */
3124 0, /* tp_dictoffset */
3125 0, /* tp_init */
3126 0, /* tp_alloc */
3127 basestring_new, /* tp_new */
3128 0, /* tp_free */
3129};
3130
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003131PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003132"str(object) -> string\n\
3133\n\
3134Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003135If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003136
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003137PyTypeObject PyString_Type = {
3138 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003139 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003140 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003141 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003142 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003143 (destructor)string_dealloc, /* tp_dealloc */
3144 (printfunc)string_print, /* tp_print */
3145 0, /* tp_getattr */
3146 0, /* tp_setattr */
3147 0, /* tp_compare */
3148 (reprfunc)string_repr, /* tp_repr */
3149 0, /* tp_as_number */
3150 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003151 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003152 (hashfunc)string_hash, /* tp_hash */
3153 0, /* tp_call */
3154 (reprfunc)string_str, /* tp_str */
3155 PyObject_GenericGetAttr, /* tp_getattro */
3156 0, /* tp_setattro */
3157 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00003158 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003159 string_doc, /* tp_doc */
3160 0, /* tp_traverse */
3161 0, /* tp_clear */
3162 (richcmpfunc)string_richcompare, /* tp_richcompare */
3163 0, /* tp_weaklistoffset */
3164 0, /* tp_iter */
3165 0, /* tp_iternext */
3166 string_methods, /* tp_methods */
3167 0, /* tp_members */
3168 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003169 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003170 0, /* tp_dict */
3171 0, /* tp_descr_get */
3172 0, /* tp_descr_set */
3173 0, /* tp_dictoffset */
3174 0, /* tp_init */
3175 0, /* tp_alloc */
3176 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003177 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003178};
3179
3180void
Fred Drakeba096332000-07-09 07:04:36 +00003181PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003182{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003183 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003184 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003185 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003186 if (w == NULL || !PyString_Check(*pv)) {
3187 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003188 *pv = NULL;
3189 return;
3190 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003191 v = string_concat((PyStringObject *) *pv, w);
3192 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003193 *pv = v;
3194}
3195
Guido van Rossum013142a1994-08-30 08:19:36 +00003196void
Fred Drakeba096332000-07-09 07:04:36 +00003197PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003198{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003199 PyString_Concat(pv, w);
3200 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003201}
3202
3203
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003204/* The following function breaks the notion that strings are immutable:
3205 it changes the size of a string. We get away with this only if there
3206 is only one module referencing the object. You can also think of it
3207 as creating a new string object and destroying the old one, only
3208 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003209 already be known to some other part of the code...
3210 Note that if there's not enough memory to resize the string, the original
3211 string object at *pv is deallocated, *pv is set to NULL, an "out of
3212 memory" exception is set, and -1 is returned. Else (on success) 0 is
3213 returned, and the value in *pv may or may not be the same as on input.
3214 As always, an extra byte is allocated for a trailing \0 byte (newsize
3215 does *not* include that), and a trailing \0 byte is stored.
3216*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003217
3218int
Fred Drakeba096332000-07-09 07:04:36 +00003219_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003220{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003221 register PyObject *v;
3222 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003223 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003224 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003225 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003226 Py_DECREF(v);
3227 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003228 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003229 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003230 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003231 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003232 _Py_ForgetReference(v);
3233 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003234 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003235 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003236 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003237 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003238 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003239 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003240 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003241 _Py_NewReference(*pv);
3242 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003243 sv->ob_size = newsize;
3244 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003245 return 0;
3246}
Guido van Rossume5372401993-03-16 12:15:04 +00003247
3248/* Helpers for formatstring */
3249
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003250static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003251getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003252{
3253 int argidx = *p_argidx;
3254 if (argidx < arglen) {
3255 (*p_argidx)++;
3256 if (arglen < 0)
3257 return args;
3258 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003259 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003260 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003261 PyErr_SetString(PyExc_TypeError,
3262 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003263 return NULL;
3264}
3265
Tim Peters38fd5b62000-09-21 05:43:11 +00003266/* Format codes
3267 * F_LJUST '-'
3268 * F_SIGN '+'
3269 * F_BLANK ' '
3270 * F_ALT '#'
3271 * F_ZERO '0'
3272 */
Guido van Rossume5372401993-03-16 12:15:04 +00003273#define F_LJUST (1<<0)
3274#define F_SIGN (1<<1)
3275#define F_BLANK (1<<2)
3276#define F_ALT (1<<3)
3277#define F_ZERO (1<<4)
3278
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003279static int
Fred Drakeba096332000-07-09 07:04:36 +00003280formatfloat(char *buf, size_t buflen, int flags,
3281 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003282{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003283 /* fmt = '%#.' + `prec` + `type`
3284 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003285 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003286 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003287 x = PyFloat_AsDouble(v);
3288 if (x == -1.0 && PyErr_Occurred()) {
3289 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003290 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003291 }
Guido van Rossume5372401993-03-16 12:15:04 +00003292 if (prec < 0)
3293 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003294 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3295 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003296 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3297 (flags&F_ALT) ? "#" : "",
3298 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003299 /* worst case length calc to ensure no buffer overrun:
3300 fmt = %#.<prec>g
3301 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003302 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003303 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3304 If prec=0 the effective precision is 1 (the leading digit is
3305 always given), therefore increase by one to 10+prec. */
3306 if (buflen <= (size_t)10 + (size_t)prec) {
3307 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003308 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003309 return -1;
3310 }
Tim Peters885d4572001-11-28 20:27:42 +00003311 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003312 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003313}
3314
Tim Peters38fd5b62000-09-21 05:43:11 +00003315/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3316 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3317 * Python's regular ints.
3318 * Return value: a new PyString*, or NULL if error.
3319 * . *pbuf is set to point into it,
3320 * *plen set to the # of chars following that.
3321 * Caller must decref it when done using pbuf.
3322 * The string starting at *pbuf is of the form
3323 * "-"? ("0x" | "0X")? digit+
3324 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003325 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003326 * There will be at least prec digits, zero-filled on the left if
3327 * necessary to get that many.
3328 * val object to be converted
3329 * flags bitmask of format flags; only F_ALT is looked at
3330 * prec minimum number of digits; 0-fill on left if needed
3331 * type a character in [duoxX]; u acts the same as d
3332 *
3333 * CAUTION: o, x and X conversions on regular ints can never
3334 * produce a '-' sign, but can for Python's unbounded ints.
3335 */
3336PyObject*
3337_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3338 char **pbuf, int *plen)
3339{
3340 PyObject *result = NULL;
3341 char *buf;
3342 int i;
3343 int sign; /* 1 if '-', else 0 */
3344 int len; /* number of characters */
3345 int numdigits; /* len == numnondigits + numdigits */
3346 int numnondigits = 0;
3347
3348 switch (type) {
3349 case 'd':
3350 case 'u':
3351 result = val->ob_type->tp_str(val);
3352 break;
3353 case 'o':
3354 result = val->ob_type->tp_as_number->nb_oct(val);
3355 break;
3356 case 'x':
3357 case 'X':
3358 numnondigits = 2;
3359 result = val->ob_type->tp_as_number->nb_hex(val);
3360 break;
3361 default:
3362 assert(!"'type' not in [duoxX]");
3363 }
3364 if (!result)
3365 return NULL;
3366
3367 /* To modify the string in-place, there can only be one reference. */
3368 if (result->ob_refcnt != 1) {
3369 PyErr_BadInternalCall();
3370 return NULL;
3371 }
3372 buf = PyString_AsString(result);
3373 len = PyString_Size(result);
3374 if (buf[len-1] == 'L') {
3375 --len;
3376 buf[len] = '\0';
3377 }
3378 sign = buf[0] == '-';
3379 numnondigits += sign;
3380 numdigits = len - numnondigits;
3381 assert(numdigits > 0);
3382
Tim Petersfff53252001-04-12 18:38:48 +00003383 /* Get rid of base marker unless F_ALT */
3384 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003385 /* Need to skip 0x, 0X or 0. */
3386 int skipped = 0;
3387 switch (type) {
3388 case 'o':
3389 assert(buf[sign] == '0');
3390 /* If 0 is only digit, leave it alone. */
3391 if (numdigits > 1) {
3392 skipped = 1;
3393 --numdigits;
3394 }
3395 break;
3396 case 'x':
3397 case 'X':
3398 assert(buf[sign] == '0');
3399 assert(buf[sign + 1] == 'x');
3400 skipped = 2;
3401 numnondigits -= 2;
3402 break;
3403 }
3404 if (skipped) {
3405 buf += skipped;
3406 len -= skipped;
3407 if (sign)
3408 buf[0] = '-';
3409 }
3410 assert(len == numnondigits + numdigits);
3411 assert(numdigits > 0);
3412 }
3413
3414 /* Fill with leading zeroes to meet minimum width. */
3415 if (prec > numdigits) {
3416 PyObject *r1 = PyString_FromStringAndSize(NULL,
3417 numnondigits + prec);
3418 char *b1;
3419 if (!r1) {
3420 Py_DECREF(result);
3421 return NULL;
3422 }
3423 b1 = PyString_AS_STRING(r1);
3424 for (i = 0; i < numnondigits; ++i)
3425 *b1++ = *buf++;
3426 for (i = 0; i < prec - numdigits; i++)
3427 *b1++ = '0';
3428 for (i = 0; i < numdigits; i++)
3429 *b1++ = *buf++;
3430 *b1 = '\0';
3431 Py_DECREF(result);
3432 result = r1;
3433 buf = PyString_AS_STRING(result);
3434 len = numnondigits + prec;
3435 }
3436
3437 /* Fix up case for hex conversions. */
3438 switch (type) {
3439 case 'x':
3440 /* Need to convert all upper case letters to lower case. */
3441 for (i = 0; i < len; i++)
3442 if (buf[i] >= 'A' && buf[i] <= 'F')
3443 buf[i] += 'a'-'A';
3444 break;
3445 case 'X':
3446 /* Need to convert 0x to 0X (and -0x to -0X). */
3447 if (buf[sign + 1] == 'x')
3448 buf[sign + 1] = 'X';
3449 break;
3450 }
3451 *pbuf = buf;
3452 *plen = len;
3453 return result;
3454}
3455
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003456static int
Fred Drakeba096332000-07-09 07:04:36 +00003457formatint(char *buf, size_t buflen, int flags,
3458 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003459{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003460 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003461 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3462 + 1 + 1 = 24 */
3463 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003464 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003465
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003466 x = PyInt_AsLong(v);
3467 if (x == -1 && PyErr_Occurred()) {
3468 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003469 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003470 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003471 if (x < 0 && type != 'd' && type != 'i') {
3472 if (PyErr_Warn(PyExc_DeprecationWarning,
3473 "%u/%o/%x/%X of negative int will return "
3474 "a signed string in Python 2.4 and up") < 0)
3475 return -1;
3476 }
Guido van Rossume5372401993-03-16 12:15:04 +00003477 if (prec < 0)
3478 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003479
3480 if ((flags & F_ALT) &&
3481 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003482 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003483 * of issues that cause pain:
3484 * - when 0 is being converted, the C standard leaves off
3485 * the '0x' or '0X', which is inconsistent with other
3486 * %#x/%#X conversions and inconsistent with Python's
3487 * hex() function
3488 * - there are platforms that violate the standard and
3489 * convert 0 with the '0x' or '0X'
3490 * (Metrowerks, Compaq Tru64)
3491 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003492 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003493 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003494 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003495 * We can achieve the desired consistency by inserting our
3496 * own '0x' or '0X' prefix, and substituting %x/%X in place
3497 * of %#x/%#X.
3498 *
3499 * Note that this is the same approach as used in
3500 * formatint() in unicodeobject.c
3501 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003502 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003503 type, prec, type);
3504 }
3505 else {
3506 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003507 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003508 prec, type);
3509 }
3510
Tim Peters38fd5b62000-09-21 05:43:11 +00003511 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003512 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3513 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003514 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003515 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003516 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003517 return -1;
3518 }
Tim Peters885d4572001-11-28 20:27:42 +00003519 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003520 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003521}
3522
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003523static int
Fred Drakeba096332000-07-09 07:04:36 +00003524formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003525{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003526 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003527 if (PyString_Check(v)) {
3528 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003529 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003530 }
3531 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003532 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003533 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003534 }
3535 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003536 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003537}
3538
Guido van Rossum013142a1994-08-30 08:19:36 +00003539
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003540/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3541
3542 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3543 chars are formatted. XXX This is a magic number. Each formatting
3544 routine does bounds checking to ensure no overflow, but a better
3545 solution may be to malloc a buffer of appropriate size for each
3546 format. For now, the current solution is sufficient.
3547*/
3548#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003549
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003550PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003551PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003552{
3553 char *fmt, *res;
3554 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003555 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003556 PyObject *result, *orig_args;
3557#ifdef Py_USING_UNICODE
3558 PyObject *v, *w;
3559#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003560 PyObject *dict = NULL;
3561 if (format == NULL || !PyString_Check(format) || args == NULL) {
3562 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003563 return NULL;
3564 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003565 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003566 fmt = PyString_AS_STRING(format);
3567 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003568 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003569 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003570 if (result == NULL)
3571 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003572 res = PyString_AsString(result);
3573 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003574 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003575 argidx = 0;
3576 }
3577 else {
3578 arglen = -1;
3579 argidx = -2;
3580 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003581 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003582 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003583 while (--fmtcnt >= 0) {
3584 if (*fmt != '%') {
3585 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003586 rescnt = fmtcnt + 100;
3587 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003588 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003589 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003590 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003591 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003592 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003593 }
3594 *res++ = *fmt++;
3595 }
3596 else {
3597 /* Got a format specifier */
3598 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003599 int width = -1;
3600 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003601 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003602 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003603 PyObject *v = NULL;
3604 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003605 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003606 int sign;
3607 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003608 char formatbuf[FORMATBUFLEN];
3609 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003610#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003611 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003612 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003613#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003614
Guido van Rossumda9c2711996-12-05 21:58:58 +00003615 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003616 if (*fmt == '(') {
3617 char *keystart;
3618 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003619 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003620 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003621
3622 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003623 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003624 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003625 goto error;
3626 }
3627 ++fmt;
3628 --fmtcnt;
3629 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003630 /* Skip over balanced parentheses */
3631 while (pcount > 0 && --fmtcnt >= 0) {
3632 if (*fmt == ')')
3633 --pcount;
3634 else if (*fmt == '(')
3635 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003636 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003637 }
3638 keylen = fmt - keystart - 1;
3639 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003640 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003641 "incomplete format key");
3642 goto error;
3643 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003644 key = PyString_FromStringAndSize(keystart,
3645 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003646 if (key == NULL)
3647 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003648 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003649 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003650 args_owned = 0;
3651 }
3652 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003653 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003654 if (args == NULL) {
3655 goto error;
3656 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003657 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003658 arglen = -1;
3659 argidx = -2;
3660 }
Guido van Rossume5372401993-03-16 12:15:04 +00003661 while (--fmtcnt >= 0) {
3662 switch (c = *fmt++) {
3663 case '-': flags |= F_LJUST; continue;
3664 case '+': flags |= F_SIGN; continue;
3665 case ' ': flags |= F_BLANK; continue;
3666 case '#': flags |= F_ALT; continue;
3667 case '0': flags |= F_ZERO; continue;
3668 }
3669 break;
3670 }
3671 if (c == '*') {
3672 v = getnextarg(args, arglen, &argidx);
3673 if (v == NULL)
3674 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003675 if (!PyInt_Check(v)) {
3676 PyErr_SetString(PyExc_TypeError,
3677 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003678 goto error;
3679 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003680 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003681 if (width < 0) {
3682 flags |= F_LJUST;
3683 width = -width;
3684 }
Guido van Rossume5372401993-03-16 12:15:04 +00003685 if (--fmtcnt >= 0)
3686 c = *fmt++;
3687 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003688 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003689 width = c - '0';
3690 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003691 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003692 if (!isdigit(c))
3693 break;
3694 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003695 PyErr_SetString(
3696 PyExc_ValueError,
3697 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003698 goto error;
3699 }
3700 width = width*10 + (c - '0');
3701 }
3702 }
3703 if (c == '.') {
3704 prec = 0;
3705 if (--fmtcnt >= 0)
3706 c = *fmt++;
3707 if (c == '*') {
3708 v = getnextarg(args, arglen, &argidx);
3709 if (v == NULL)
3710 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003711 if (!PyInt_Check(v)) {
3712 PyErr_SetString(
3713 PyExc_TypeError,
3714 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003715 goto error;
3716 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003717 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003718 if (prec < 0)
3719 prec = 0;
3720 if (--fmtcnt >= 0)
3721 c = *fmt++;
3722 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003723 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003724 prec = c - '0';
3725 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003726 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003727 if (!isdigit(c))
3728 break;
3729 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003730 PyErr_SetString(
3731 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003732 "prec too big");
3733 goto error;
3734 }
3735 prec = prec*10 + (c - '0');
3736 }
3737 }
3738 } /* prec */
3739 if (fmtcnt >= 0) {
3740 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003741 if (--fmtcnt >= 0)
3742 c = *fmt++;
3743 }
3744 }
3745 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003746 PyErr_SetString(PyExc_ValueError,
3747 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003748 goto error;
3749 }
3750 if (c != '%') {
3751 v = getnextarg(args, arglen, &argidx);
3752 if (v == NULL)
3753 goto error;
3754 }
3755 sign = 0;
3756 fill = ' ';
3757 switch (c) {
3758 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003759 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003760 len = 1;
3761 break;
3762 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003763 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003764#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003765 if (PyUnicode_Check(v)) {
3766 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003767 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003768 goto unicode;
3769 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003770#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003771 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003772 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003773 else
3774 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003775 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003776 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003777 if (!PyString_Check(temp)) {
3778 PyErr_SetString(PyExc_TypeError,
3779 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003780 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003781 goto error;
3782 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003783 pbuf = PyString_AS_STRING(temp);
3784 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003785 if (prec >= 0 && len > prec)
3786 len = prec;
3787 break;
3788 case 'i':
3789 case 'd':
3790 case 'u':
3791 case 'o':
3792 case 'x':
3793 case 'X':
3794 if (c == 'i')
3795 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003796 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003797 temp = _PyString_FormatLong(v, flags,
3798 prec, c, &pbuf, &len);
3799 if (!temp)
3800 goto error;
3801 /* unbounded ints can always produce
3802 a sign character! */
3803 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003804 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003805 else {
3806 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003807 len = formatint(pbuf,
3808 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003809 flags, prec, c, v);
3810 if (len < 0)
3811 goto error;
3812 /* only d conversion is signed */
3813 sign = c == 'd';
3814 }
3815 if (flags & F_ZERO)
3816 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003817 break;
3818 case 'e':
3819 case 'E':
3820 case 'f':
3821 case 'g':
3822 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003823 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003824 len = formatfloat(pbuf, sizeof(formatbuf),
3825 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003826 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003827 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003828 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003829 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003830 fill = '0';
3831 break;
3832 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003833 pbuf = formatbuf;
3834 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003835 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003836 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003837 break;
3838 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003839 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003840 "unsupported format character '%c' (0x%x) "
3841 "at index %i",
3842 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003843 goto error;
3844 }
3845 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003846 if (*pbuf == '-' || *pbuf == '+') {
3847 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003848 len--;
3849 }
3850 else if (flags & F_SIGN)
3851 sign = '+';
3852 else if (flags & F_BLANK)
3853 sign = ' ';
3854 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003855 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003856 }
3857 if (width < len)
3858 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003859 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003860 reslen -= rescnt;
3861 rescnt = width + fmtcnt + 100;
3862 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003863 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003864 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003865 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003866 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003867 }
3868 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003869 if (fill != ' ')
3870 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003871 rescnt--;
3872 if (width > len)
3873 width--;
3874 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003875 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3876 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003877 assert(pbuf[1] == c);
3878 if (fill != ' ') {
3879 *res++ = *pbuf++;
3880 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003881 }
Tim Petersfff53252001-04-12 18:38:48 +00003882 rescnt -= 2;
3883 width -= 2;
3884 if (width < 0)
3885 width = 0;
3886 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003887 }
3888 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003889 do {
3890 --rescnt;
3891 *res++ = fill;
3892 } while (--width > len);
3893 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003894 if (fill == ' ') {
3895 if (sign)
3896 *res++ = sign;
3897 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003898 (c == 'x' || c == 'X')) {
3899 assert(pbuf[0] == '0');
3900 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003901 *res++ = *pbuf++;
3902 *res++ = *pbuf++;
3903 }
3904 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003905 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003906 res += len;
3907 rescnt -= len;
3908 while (--width >= len) {
3909 --rescnt;
3910 *res++ = ' ';
3911 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003912 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003913 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003914 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003915 goto error;
3916 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003917 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003918 } /* '%' */
3919 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003920 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003921 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003922 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003923 goto error;
3924 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003925 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003926 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003927 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003928 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003929 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003930
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003931#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003932 unicode:
3933 if (args_owned) {
3934 Py_DECREF(args);
3935 args_owned = 0;
3936 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003937 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003938 if (PyTuple_Check(orig_args) && argidx > 0) {
3939 PyObject *v;
3940 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3941 v = PyTuple_New(n);
3942 if (v == NULL)
3943 goto error;
3944 while (--n >= 0) {
3945 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3946 Py_INCREF(w);
3947 PyTuple_SET_ITEM(v, n, w);
3948 }
3949 args = v;
3950 } else {
3951 Py_INCREF(orig_args);
3952 args = orig_args;
3953 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003954 args_owned = 1;
3955 /* Take what we have of the result and let the Unicode formatting
3956 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003957 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003958 if (_PyString_Resize(&result, rescnt))
3959 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003960 fmtcnt = PyString_GET_SIZE(format) - \
3961 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003962 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3963 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003964 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003965 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003966 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003967 if (v == NULL)
3968 goto error;
3969 /* Paste what we have (result) to what the Unicode formatting
3970 function returned (v) and return the result (or error) */
3971 w = PyUnicode_Concat(result, v);
3972 Py_DECREF(result);
3973 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003974 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003975 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003976#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003977
Guido van Rossume5372401993-03-16 12:15:04 +00003978 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003979 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003980 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003981 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003982 }
Guido van Rossume5372401993-03-16 12:15:04 +00003983 return NULL;
3984}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003985
3986
Guido van Rossum2a61e741997-01-18 07:55:05 +00003987
Barry Warsaw4df762f2000-08-16 23:41:01 +00003988/* This dictionary will leak at PyString_Fini() time. That's acceptable
3989 * because PyString_Fini() specifically frees interned strings that are
3990 * only referenced by this dictionary. The CVS log entry for revision 2.45
3991 * says:
3992 *
3993 * Change the Fini function to only remove otherwise unreferenced
3994 * strings from the interned table. There are references in
3995 * hard-to-find static variables all over the interpreter, and it's not
3996 * worth trying to get rid of all those; but "uninterning" isn't fair
3997 * either and may cause subtle failures later -- so we have to keep them
3998 * in the interned table.
3999 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004000static PyObject *interned;
4001
4002void
Fred Drakeba096332000-07-09 07:04:36 +00004003PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004004{
4005 register PyStringObject *s = (PyStringObject *)(*p);
4006 PyObject *t;
4007 if (s == NULL || !PyString_Check(s))
4008 Py_FatalError("PyString_InternInPlace: strings only please!");
4009 if ((t = s->ob_sinterned) != NULL) {
4010 if (t == (PyObject *)s)
4011 return;
4012 Py_INCREF(t);
4013 *p = t;
4014 Py_DECREF(s);
4015 return;
4016 }
4017 if (interned == NULL) {
4018 interned = PyDict_New();
4019 if (interned == NULL)
4020 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004021 }
4022 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4023 Py_INCREF(t);
4024 *p = s->ob_sinterned = t;
4025 Py_DECREF(s);
4026 return;
4027 }
Tim Peters111f6092001-09-12 07:54:51 +00004028 /* Ensure that only true string objects appear in the intern dict,
4029 and as the value of ob_sinterned. */
4030 if (PyString_CheckExact(s)) {
4031 t = (PyObject *)s;
4032 if (PyDict_SetItem(interned, t, t) == 0) {
4033 s->ob_sinterned = t;
4034 return;
4035 }
4036 }
4037 else {
4038 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4039 PyString_GET_SIZE(s));
4040 if (t != NULL) {
4041 if (PyDict_SetItem(interned, t, t) == 0) {
4042 *p = s->ob_sinterned = t;
4043 Py_DECREF(s);
4044 return;
4045 }
4046 Py_DECREF(t);
4047 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004048 }
4049 PyErr_Clear();
4050}
4051
4052
4053PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004054PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004055{
4056 PyObject *s = PyString_FromString(cp);
4057 if (s == NULL)
4058 return NULL;
4059 PyString_InternInPlace(&s);
4060 return s;
4061}
4062
Guido van Rossum8cf04761997-08-02 02:57:45 +00004063void
Fred Drakeba096332000-07-09 07:04:36 +00004064PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004065{
4066 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004067 for (i = 0; i < UCHAR_MAX + 1; i++) {
4068 Py_XDECREF(characters[i]);
4069 characters[i] = NULL;
4070 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004071 Py_XDECREF(nullstring);
4072 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00004073 if (interned) {
4074 int pos, changed;
4075 PyObject *key, *value;
4076 do {
4077 changed = 0;
4078 pos = 0;
4079 while (PyDict_Next(interned, &pos, &key, &value)) {
4080 if (key->ob_refcnt == 2 && key == value) {
4081 PyDict_DelItem(interned, key);
4082 changed = 1;
4083 }
4084 }
4085 } while (changed);
4086 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004087}
Barry Warsawa903ad982001-02-23 16:40:48 +00004088
Barry Warsawa903ad982001-02-23 16:40:48 +00004089void _Py_ReleaseInternedStrings(void)
4090{
4091 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00004092 fprintf(stderr, "releasing interned strings\n");
4093 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00004094 Py_DECREF(interned);
4095 interned = NULL;
4096 }
4097}