blob: d8e6ff88bfec62431048542e68bda27ba0beccc8 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
Martin v. Löwis1f803f72002-01-16 10:53:24 +000022 For both PyString_FromString() and PyString_FromStringAndSize(), the
23 parameter `size' denotes number of characters to allocate, not counting any
24 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000025
Martin v. Löwis1f803f72002-01-16 10:53:24 +000026 For PyString_FromString(), the parameter `str' points to a null-terminated
27 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000028
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 For PyString_FromStringAndSize(), the parameter the parameter `str' is
30 either NULL or else points to a string containing at least `size' bytes. For
31 PyString_FromStringAndSize(), the string in the `str' parameter does not
32 have to be null-terminated. (Therefore it is safe to construct a substring
33 by calling `PyString_FromStringAndSize(origstring, substrlen)'.) If `str'
34 is NULL then PyString_FromStringAndSize() will allocate `size+1' bytes
35 (setting the last byte to the null terminating character) and you can fill in
36 the data yourself. If `str' is non-NULL then the resulting PyString object
37 must be treated as immutable and you must not fill in nor alter the data
38 yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000039
Martin v. Löwis1f803f72002-01-16 10:53:24 +000040 The PyObject member `op->ob_size', which denotes the number of "extra items"
41 in a variable-size object, will contain the number of bytes allocated for
42 string data, not counting the null terminating character. It is therefore
43 equal to the equal to the `size' parameter (for PyString_FromStringAndSize())
44 or the length of the string in the `str' parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000045*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000046PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000047PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000048{
Tim Peters9e897f42001-05-09 07:37:07 +000049 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000050#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051 if (size == 0 && (op = nullstring) != NULL) {
52#ifdef COUNT_ALLOCS
53 null_strings++;
54#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 Py_INCREF(op);
56 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000058 if (size == 1 && str != NULL &&
59 (op = characters[*str & UCHAR_MAX]) != NULL)
60 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061#ifdef COUNT_ALLOCS
62 one_strings++;
63#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 Py_INCREF(op);
65 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000067#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000068
69 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 op = (PyStringObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +000071 _PyMalloc_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000072 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000074 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000076 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000077 if (str != NULL)
78 memcpy(op->ob_sval, str, size);
79 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000080#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000088 PyObject *t = (PyObject *)op;
89 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000090 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000093 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000094#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000096}
97
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000099PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000100{
Tim Peters62de65b2001-12-06 20:29:32 +0000101 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000102 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000103
104 assert(str != NULL);
105 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000106 if (size > INT_MAX) {
107 PyErr_SetString(PyExc_OverflowError,
108 "string is too long for a Python string");
109 return NULL;
110 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 if (size == 0 && (op = nullstring) != NULL) {
113#ifdef COUNT_ALLOCS
114 null_strings++;
115#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000116 Py_INCREF(op);
117 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000118 }
119 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
120#ifdef COUNT_ALLOCS
121 one_strings++;
122#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 Py_INCREF(op);
124 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
128 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 op = (PyStringObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +0000130 _PyMalloc_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000135 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000136 memcpy(op->ob_sval, str, size+1);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000137#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000151#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000152 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000153}
154
Barry Warsawdadace02001-08-24 18:32:06 +0000155PyObject *
156PyString_FromFormatV(const char *format, va_list vargs)
157{
Tim Petersc15c4f12001-10-02 21:32:07 +0000158 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 int n = 0;
160 const char* f;
161 char *s;
162 PyObject* string;
163
Tim Petersc15c4f12001-10-02 21:32:07 +0000164#ifdef VA_LIST_IS_ARRAY
165 memcpy(count, vargs, sizeof(va_list));
166#else
167 count = vargs;
168#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000169 /* step 1: figure out how large a buffer we need */
170 for (f = format; *f; f++) {
171 if (*f == '%') {
172 const char* p = f;
173 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
174 ;
175
176 /* skip the 'l' in %ld, since it doesn't change the
177 width. although only %d is supported (see
178 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000179 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000180 if (*f == 'l' && *(f+1) == 'd')
181 ++f;
182
183 switch (*f) {
184 case 'c':
185 (void)va_arg(count, int);
186 /* fall through... */
187 case '%':
188 n++;
189 break;
190 case 'd': case 'i': case 'x':
191 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000192 /* 20 bytes is enough to hold a 64-bit
193 integer. Decimal takes the most space.
194 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000195 n += 20;
196 break;
197 case 's':
198 s = va_arg(count, char*);
199 n += strlen(s);
200 break;
201 case 'p':
202 (void) va_arg(count, int);
203 /* maximum 64-bit pointer representation:
204 * 0xffffffffffffffff
205 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000206 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000207 */
208 n += 19;
209 break;
210 default:
211 /* if we stumble upon an unknown
212 formatting code, copy the rest of
213 the format string to the output
214 string. (we cannot just skip the
215 code, since there's no way to know
216 what's in the argument list) */
217 n += strlen(p);
218 goto expand;
219 }
220 } else
221 n++;
222 }
223 expand:
224 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000225 /* Since we've analyzed how much space we need for the worst case,
226 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000227 string = PyString_FromStringAndSize(NULL, n);
228 if (!string)
229 return NULL;
230
231 s = PyString_AsString(string);
232
233 for (f = format; *f; f++) {
234 if (*f == '%') {
235 const char* p = f++;
236 int i, longflag = 0;
237 /* parse the width.precision part (we're only
238 interested in the precision value, if any) */
239 n = 0;
240 while (isdigit(Py_CHARMASK(*f)))
241 n = (n*10) + *f++ - '0';
242 if (*f == '.') {
243 f++;
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 }
248 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
249 f++;
250 /* handle the long flag, but only for %ld. others
251 can be added when necessary. */
252 if (*f == 'l' && *(f+1) == 'd') {
253 longflag = 1;
254 ++f;
255 }
256
257 switch (*f) {
258 case 'c':
259 *s++ = va_arg(vargs, int);
260 break;
261 case 'd':
262 if (longflag)
263 sprintf(s, "%ld", va_arg(vargs, long));
264 else
265 sprintf(s, "%d", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 'i':
269 sprintf(s, "%i", va_arg(vargs, int));
270 s += strlen(s);
271 break;
272 case 'x':
273 sprintf(s, "%x", va_arg(vargs, int));
274 s += strlen(s);
275 break;
276 case 's':
277 p = va_arg(vargs, char*);
278 i = strlen(p);
279 if (n > 0 && i > n)
280 i = n;
281 memcpy(s, p, i);
282 s += i;
283 break;
284 case 'p':
285 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000286 /* %p is ill-defined: ensure leading 0x. */
287 if (s[1] == 'X')
288 s[1] = 'x';
289 else if (s[1] != 'x') {
290 memmove(s+2, s, strlen(s)+1);
291 s[0] = '0';
292 s[1] = 'x';
293 }
Barry Warsawdadace02001-08-24 18:32:06 +0000294 s += strlen(s);
295 break;
296 case '%':
297 *s++ = '%';
298 break;
299 default:
300 strcpy(s, p);
301 s += strlen(s);
302 goto end;
303 }
304 } else
305 *s++ = *f;
306 }
307
308 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000309 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000310 return string;
311}
312
313PyObject *
314PyString_FromFormat(const char *format, ...)
315{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000317 va_list vargs;
318
319#ifdef HAVE_STDARG_PROTOTYPES
320 va_start(vargs, format);
321#else
322 va_start(vargs);
323#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000324 ret = PyString_FromFormatV(format, vargs);
325 va_end(vargs);
326 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000327}
328
329
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000330PyObject *PyString_Decode(const char *s,
331 int size,
332 const char *encoding,
333 const char *errors)
334{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000335 PyObject *v, *str;
336
337 str = PyString_FromStringAndSize(s, size);
338 if (str == NULL)
339 return NULL;
340 v = PyString_AsDecodedString(str, encoding, errors);
341 Py_DECREF(str);
342 return v;
343}
344
345PyObject *PyString_AsDecodedObject(PyObject *str,
346 const char *encoding,
347 const char *errors)
348{
349 PyObject *v;
350
351 if (!PyString_Check(str)) {
352 PyErr_BadArgument();
353 goto onError;
354 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000355
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000356 if (encoding == NULL) {
357#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000359#else
360 PyErr_SetString(PyExc_ValueError, "no encoding specified");
361 goto onError;
362#endif
363 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364
365 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000366 v = PyCodec_Decode(str, encoding, errors);
367 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000368 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000369
370 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000371
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000372 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 return NULL;
374}
375
376PyObject *PyString_AsDecodedString(PyObject *str,
377 const char *encoding,
378 const char *errors)
379{
380 PyObject *v;
381
382 v = PyString_AsDecodedObject(str, encoding, errors);
383 if (v == NULL)
384 goto onError;
385
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000386#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000387 /* Convert Unicode to a string using the default encoding */
388 if (PyUnicode_Check(v)) {
389 PyObject *temp = v;
390 v = PyUnicode_AsEncodedString(v, NULL, NULL);
391 Py_DECREF(temp);
392 if (v == NULL)
393 goto onError;
394 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000395#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000396 if (!PyString_Check(v)) {
397 PyErr_Format(PyExc_TypeError,
398 "decoder did not return a string object (type=%.400s)",
399 v->ob_type->tp_name);
400 Py_DECREF(v);
401 goto onError;
402 }
403
404 return v;
405
406 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000407 return NULL;
408}
409
410PyObject *PyString_Encode(const char *s,
411 int size,
412 const char *encoding,
413 const char *errors)
414{
415 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000416
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000417 str = PyString_FromStringAndSize(s, size);
418 if (str == NULL)
419 return NULL;
420 v = PyString_AsEncodedString(str, encoding, errors);
421 Py_DECREF(str);
422 return v;
423}
424
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000425PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000426 const char *encoding,
427 const char *errors)
428{
429 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000430
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000431 if (!PyString_Check(str)) {
432 PyErr_BadArgument();
433 goto onError;
434 }
435
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000436 if (encoding == NULL) {
437#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000439#else
440 PyErr_SetString(PyExc_ValueError, "no encoding specified");
441 goto onError;
442#endif
443 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000444
445 /* Encode via the codec registry */
446 v = PyCodec_Encode(str, encoding, errors);
447 if (v == NULL)
448 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000449
450 return v;
451
452 onError:
453 return NULL;
454}
455
456PyObject *PyString_AsEncodedString(PyObject *str,
457 const char *encoding,
458 const char *errors)
459{
460 PyObject *v;
461
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000462 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000463 if (v == NULL)
464 goto onError;
465
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000466#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000467 /* Convert Unicode to a string using the default encoding */
468 if (PyUnicode_Check(v)) {
469 PyObject *temp = v;
470 v = PyUnicode_AsEncodedString(v, NULL, NULL);
471 Py_DECREF(temp);
472 if (v == NULL)
473 goto onError;
474 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000475#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000476 if (!PyString_Check(v)) {
477 PyErr_Format(PyExc_TypeError,
478 "encoder did not return a string object (type=%.400s)",
479 v->ob_type->tp_name);
480 Py_DECREF(v);
481 goto onError;
482 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000483
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000484 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000485
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000486 onError:
487 return NULL;
488}
489
Guido van Rossum234f9421993-06-17 12:35:49 +0000490static void
Fred Drakeba096332000-07-09 07:04:36 +0000491string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000492{
Guido van Rossum9475a232001-10-05 20:51:39 +0000493 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000494}
495
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000496static int
497string_getsize(register PyObject *op)
498{
499 char *s;
500 int len;
501 if (PyString_AsStringAndSize(op, &s, &len))
502 return -1;
503 return len;
504}
505
506static /*const*/ char *
507string_getbuffer(register PyObject *op)
508{
509 char *s;
510 int len;
511 if (PyString_AsStringAndSize(op, &s, &len))
512 return NULL;
513 return s;
514}
515
Guido van Rossumd7047b31995-01-02 19:07:15 +0000516int
Fred Drakeba096332000-07-09 07:04:36 +0000517PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519 if (!PyString_Check(op))
520 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000521 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522}
523
524/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000525PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000526{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000527 if (!PyString_Check(op))
528 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000529 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000530}
531
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000532int
533PyString_AsStringAndSize(register PyObject *obj,
534 register char **s,
535 register int *len)
536{
537 if (s == NULL) {
538 PyErr_BadInternalCall();
539 return -1;
540 }
541
542 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000543#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000544 if (PyUnicode_Check(obj)) {
545 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
546 if (obj == NULL)
547 return -1;
548 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000549 else
550#endif
551 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000552 PyErr_Format(PyExc_TypeError,
553 "expected string or Unicode object, "
554 "%.200s found", obj->ob_type->tp_name);
555 return -1;
556 }
557 }
558
559 *s = PyString_AS_STRING(obj);
560 if (len != NULL)
561 *len = PyString_GET_SIZE(obj);
562 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
563 PyErr_SetString(PyExc_TypeError,
564 "expected string without null bytes");
565 return -1;
566 }
567 return 0;
568}
569
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000570/* Methods */
571
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000572static int
Fred Drakeba096332000-07-09 07:04:36 +0000573string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000574{
575 int i;
576 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000577 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000578
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000579 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000580 if (! PyString_CheckExact(op)) {
581 int ret;
582 /* A str subclass may have its own __str__ method. */
583 op = (PyStringObject *) PyObject_Str((PyObject *)op);
584 if (op == NULL)
585 return -1;
586 ret = string_print(op, fp, flags);
587 Py_DECREF(op);
588 return ret;
589 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000590 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000591 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000592 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000594
Thomas Wouters7e474022000-07-16 12:04:32 +0000595 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000596 quote = '\'';
Martin v. Löwis1f803f72002-01-16 10:53:24 +0000597 if (memchr(op->ob_sval, '\'', op->ob_size) && !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000598 quote = '"';
599
600 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000601 for (i = 0; i < op->ob_size; i++) {
602 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000603 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000604 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000605 else if (c == '\t')
606 fprintf(fp, "\\t");
607 else if (c == '\n')
608 fprintf(fp, "\\n");
609 else if (c == '\r')
610 fprintf(fp, "\\r");
611 else if (c < ' ' || c >= 0x7f)
612 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000614 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000616 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000617 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000618}
619
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000620static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000621string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000622{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000623 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
624 PyObject *v;
625 if (newsize > INT_MAX) {
626 PyErr_SetString(PyExc_OverflowError,
627 "string is too large to make repr");
628 }
629 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000630 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000631 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000632 }
633 else {
634 register int i;
635 register char c;
636 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000637 int quote;
638
Thomas Wouters7e474022000-07-16 12:04:32 +0000639 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000640 quote = '\'';
Martin v. Löwis1f803f72002-01-16 10:53:24 +0000641 if (memchr(op->ob_sval, '\'', op->ob_size) && !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000642 quote = '"';
643
Tim Peters9161c8b2001-12-03 01:55:38 +0000644 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000645 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000646 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000647 /* There's at least enough room for a hex escape
648 and a closing quote. */
649 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000650 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000651 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000652 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000653 else if (c == '\t')
654 *p++ = '\\', *p++ = 't';
655 else if (c == '\n')
656 *p++ = '\\', *p++ = 'n';
657 else if (c == '\r')
658 *p++ = '\\', *p++ = 'r';
659 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000660 /* For performance, we don't want to call
661 PyOS_snprintf here (extra layers of
662 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000663 sprintf(p, "\\x%02x", c & 0xff);
664 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 }
666 else
667 *p++ = c;
668 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000669 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000670 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000671 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000672 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000673 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000674 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000676}
677
Guido van Rossum189f1df2001-05-01 16:51:53 +0000678static PyObject *
679string_str(PyObject *s)
680{
Tim Petersc9933152001-10-16 20:18:24 +0000681 assert(PyString_Check(s));
682 if (PyString_CheckExact(s)) {
683 Py_INCREF(s);
684 return s;
685 }
686 else {
687 /* Subtype -- return genuine string with the same value. */
688 PyStringObject *t = (PyStringObject *) s;
689 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
690 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000691}
692
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693static int
Fred Drakeba096332000-07-09 07:04:36 +0000694string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
696 return a->ob_size;
697}
698
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000699static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000700string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701{
702 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000703 register PyStringObject *op;
704 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000705#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000706 if (PyUnicode_Check(bb))
707 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000708#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000709 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000710 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000711 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712 return NULL;
713 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000714#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000716 if ((a->ob_size == 0 || b->ob_size == 0) &&
717 PyString_CheckExact(a) && PyString_CheckExact(b)) {
718 if (a->ob_size == 0) {
719 Py_INCREF(bb);
720 return bb;
721 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722 Py_INCREF(a);
723 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000724 }
725 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000726 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000727 op = (PyStringObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +0000728 _PyMalloc_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000729 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000731 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000732 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000733 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000734 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
735 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
736 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000737 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000738#undef b
739}
740
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000741static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000742string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000743{
744 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000745 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000746 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000747 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748 if (n < 0)
749 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000750 /* watch out for overflows: the size can overflow int,
751 * and the # of bytes needed can overflow size_t
752 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000753 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000754 if (n && size / n != a->ob_size) {
755 PyErr_SetString(PyExc_OverflowError,
756 "repeated string is too long");
757 return NULL;
758 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000759 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000760 Py_INCREF(a);
761 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000762 }
Tim Peters8f422462000-09-09 06:13:41 +0000763 nbytes = size * sizeof(char);
764 if (nbytes / sizeof(char) != (size_t)size ||
765 nbytes + sizeof(PyStringObject) <= nbytes) {
766 PyErr_SetString(PyExc_OverflowError,
767 "repeated string is too long");
768 return NULL;
769 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000770 op = (PyStringObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +0000771 _PyMalloc_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000772 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000773 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000774 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000775 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000776 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000777 for (i = 0; i < size; i += a->ob_size)
778 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
779 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000780 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000781}
782
783/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
784
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000785static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000786string_slice(register PyStringObject *a, register int i, register int j)
787 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788{
789 if (i < 0)
790 i = 0;
791 if (j < 0)
792 j = 0; /* Avoid signed/unsigned bug in next line */
793 if (j > a->ob_size)
794 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000795 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
796 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000797 Py_INCREF(a);
798 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799 }
800 if (j < i)
801 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000802 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000803}
804
Guido van Rossum9284a572000-03-07 15:53:43 +0000805static int
Fred Drakeba096332000-07-09 07:04:36 +0000806string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000807{
808 register char *s, *end;
809 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000810#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000811 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000812 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000813#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000814 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000815 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000816 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000817 return -1;
818 }
819 c = PyString_AsString(el)[0];
820 s = PyString_AsString(a);
821 end = s + PyString_Size(a);
822 while (s < end) {
823 if (c == *s++)
824 return 1;
825 }
826 return 0;
827}
828
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000829static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000830string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000832 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000833 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000835 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 return NULL;
837 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000838 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000839 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000840 if (v == NULL)
841 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000842 else {
843#ifdef COUNT_ALLOCS
844 one_strings++;
845#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000846 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000847 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000848 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849}
850
Martin v. Löwiscd353062001-05-24 16:56:35 +0000851static PyObject*
852string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000854 int c;
855 int len_a, len_b;
856 int min_len;
857 PyObject *result;
858
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000859 /* Make sure both arguments are strings. */
860 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000861 result = Py_NotImplemented;
862 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000863 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000864 if (a == b) {
865 switch (op) {
866 case Py_EQ:case Py_LE:case Py_GE:
867 result = Py_True;
868 goto out;
869 case Py_NE:case Py_LT:case Py_GT:
870 result = Py_False;
871 goto out;
872 }
873 }
874 if (op == Py_EQ) {
875 /* Supporting Py_NE here as well does not save
876 much time, since Py_NE is rarely used. */
877 if (a->ob_size == b->ob_size
878 && (a->ob_sval[0] == b->ob_sval[0]
879 && memcmp(a->ob_sval, b->ob_sval,
880 a->ob_size) == 0)) {
881 result = Py_True;
882 } else {
883 result = Py_False;
884 }
885 goto out;
886 }
887 len_a = a->ob_size; len_b = b->ob_size;
888 min_len = (len_a < len_b) ? len_a : len_b;
889 if (min_len > 0) {
890 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
891 if (c==0)
892 c = memcmp(a->ob_sval, b->ob_sval, min_len);
893 }else
894 c = 0;
895 if (c == 0)
896 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
897 switch (op) {
898 case Py_LT: c = c < 0; break;
899 case Py_LE: c = c <= 0; break;
900 case Py_EQ: assert(0); break; /* unreachable */
901 case Py_NE: c = c != 0; break;
902 case Py_GT: c = c > 0; break;
903 case Py_GE: c = c >= 0; break;
904 default:
905 result = Py_NotImplemented;
906 goto out;
907 }
908 result = c ? Py_True : Py_False;
909 out:
910 Py_INCREF(result);
911 return result;
912}
913
914int
915_PyString_Eq(PyObject *o1, PyObject *o2)
916{
917 PyStringObject *a, *b;
918 a = (PyStringObject*)o1;
919 b = (PyStringObject*)o2;
920 return a->ob_size == b->ob_size
921 && *a->ob_sval == *b->ob_sval
922 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923}
924
Guido van Rossum9bfef441993-03-29 10:43:31 +0000925static long
Fred Drakeba096332000-07-09 07:04:36 +0000926string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000927{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000928 register int len;
929 register unsigned char *p;
930 register long x;
931
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000932 if (a->ob_shash != -1)
933 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000934 if (a->ob_sinterned != NULL)
935 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000936 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000937 len = a->ob_size;
938 p = (unsigned char *) a->ob_sval;
939 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000940 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000941 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000942 x ^= a->ob_size;
943 if (x == -1)
944 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000945 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000946 return x;
947}
948
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000949static int
Fred Drakeba096332000-07-09 07:04:36 +0000950string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000951{
952 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000953 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000954 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000955 return -1;
956 }
957 *ptr = (void *)self->ob_sval;
958 return self->ob_size;
959}
960
961static int
Fred Drakeba096332000-07-09 07:04:36 +0000962string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000963{
Guido van Rossum045e6881997-09-08 18:30:11 +0000964 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000965 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000966 return -1;
967}
968
969static int
Fred Drakeba096332000-07-09 07:04:36 +0000970string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000971{
972 if ( lenp )
973 *lenp = self->ob_size;
974 return 1;
975}
976
Guido van Rossum1db70701998-10-08 02:18:52 +0000977static int
Fred Drakeba096332000-07-09 07:04:36 +0000978string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000979{
980 if ( index != 0 ) {
981 PyErr_SetString(PyExc_SystemError,
982 "accessing non-existent string segment");
983 return -1;
984 }
985 *ptr = self->ob_sval;
986 return self->ob_size;
987}
988
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000989static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000990 (inquiry)string_length, /*sq_length*/
991 (binaryfunc)string_concat, /*sq_concat*/
992 (intargfunc)string_repeat, /*sq_repeat*/
993 (intargfunc)string_item, /*sq_item*/
994 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000995 0, /*sq_ass_item*/
996 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000997 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998};
999
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001000static PyBufferProcs string_as_buffer = {
1001 (getreadbufferproc)string_buffer_getreadbuf,
1002 (getwritebufferproc)string_buffer_getwritebuf,
1003 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001004 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001005};
1006
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001007
1008
1009#define LEFTSTRIP 0
1010#define RIGHTSTRIP 1
1011#define BOTHSTRIP 2
1012
1013
1014static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001015split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001016{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001017 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001018 PyObject* item;
1019 PyObject *list = PyList_New(0);
1020
1021 if (list == NULL)
1022 return NULL;
1023
Guido van Rossum4c08d552000-03-10 22:55:18 +00001024 for (i = j = 0; i < len; ) {
1025 while (i < len && isspace(Py_CHARMASK(s[i])))
1026 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001027 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001028 while (i < len && !isspace(Py_CHARMASK(s[i])))
1029 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001030 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001031 if (maxsplit-- <= 0)
1032 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001033 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1034 if (item == NULL)
1035 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001036 err = PyList_Append(list, item);
1037 Py_DECREF(item);
1038 if (err < 0)
1039 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001040 while (i < len && isspace(Py_CHARMASK(s[i])))
1041 i++;
1042 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001043 }
1044 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001045 if (j < len) {
1046 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1047 if (item == NULL)
1048 goto finally;
1049 err = PyList_Append(list, item);
1050 Py_DECREF(item);
1051 if (err < 0)
1052 goto finally;
1053 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054 return list;
1055 finally:
1056 Py_DECREF(list);
1057 return NULL;
1058}
1059
1060
1061static char split__doc__[] =
1062"S.split([sep [,maxsplit]]) -> list of strings\n\
1063\n\
1064Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001065delimiter string. If maxsplit is given, at most maxsplit\n\
1066splits are done. If sep is not specified, any whitespace string\n\
1067is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001068
1069static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001070string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001071{
1072 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001073 int maxsplit = -1;
1074 const char *s = PyString_AS_STRING(self), *sub;
1075 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001076
Guido van Rossum4c08d552000-03-10 22:55:18 +00001077 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001078 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001079 if (maxsplit < 0)
1080 maxsplit = INT_MAX;
1081 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001082 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001083 if (PyString_Check(subobj)) {
1084 sub = PyString_AS_STRING(subobj);
1085 n = PyString_GET_SIZE(subobj);
1086 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001087#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001088 else if (PyUnicode_Check(subobj))
1089 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001090#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001091 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1092 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001093 if (n == 0) {
1094 PyErr_SetString(PyExc_ValueError, "empty separator");
1095 return NULL;
1096 }
1097
1098 list = PyList_New(0);
1099 if (list == NULL)
1100 return NULL;
1101
1102 i = j = 0;
1103 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001104 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001105 if (maxsplit-- <= 0)
1106 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001107 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1108 if (item == NULL)
1109 goto fail;
1110 err = PyList_Append(list, item);
1111 Py_DECREF(item);
1112 if (err < 0)
1113 goto fail;
1114 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001115 }
1116 else
1117 i++;
1118 }
1119 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1120 if (item == NULL)
1121 goto fail;
1122 err = PyList_Append(list, item);
1123 Py_DECREF(item);
1124 if (err < 0)
1125 goto fail;
1126
1127 return list;
1128
1129 fail:
1130 Py_DECREF(list);
1131 return NULL;
1132}
1133
1134
1135static char join__doc__[] =
1136"S.join(sequence) -> string\n\
1137\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001138Return a string which is the concatenation of the strings in the\n\
1139sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001140
1141static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001142string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001143{
1144 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001145 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001146 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001147 char *p;
1148 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001149 size_t sz = 0;
1150 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001151 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152
Tim Peters19fe14e2001-01-19 03:03:47 +00001153 seq = PySequence_Fast(orig, "");
1154 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001155 if (PyErr_ExceptionMatches(PyExc_TypeError))
1156 PyErr_Format(PyExc_TypeError,
1157 "sequence expected, %.80s found",
1158 orig->ob_type->tp_name);
1159 return NULL;
1160 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001161
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001162 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001163 if (seqlen == 0) {
1164 Py_DECREF(seq);
1165 return PyString_FromString("");
1166 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001167 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001168 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001169 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1170 PyErr_Format(PyExc_TypeError,
1171 "sequence item 0: expected string,"
1172 " %.80s found",
1173 item->ob_type->tp_name);
1174 Py_DECREF(seq);
1175 return NULL;
1176 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001177 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001178 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001179 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001181
Tim Peters19fe14e2001-01-19 03:03:47 +00001182 /* There are at least two things to join. Do a pre-pass to figure out
1183 * the total amount of space we'll need (sz), see whether any argument
1184 * is absurd, and defer to the Unicode join if appropriate.
1185 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001186 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001187 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001188 item = PySequence_Fast_GET_ITEM(seq, i);
1189 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001190#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001191 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001192 /* Defer to Unicode join.
1193 * CAUTION: There's no gurantee that the
1194 * original sequence can be iterated over
1195 * again, so we must pass seq here.
1196 */
1197 PyObject *result;
1198 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001199 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001200 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001201 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001202#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001203 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001204 "sequence item %i: expected string,"
1205 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001206 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001207 Py_DECREF(seq);
1208 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001209 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001210 sz += PyString_GET_SIZE(item);
1211 if (i != 0)
1212 sz += seplen;
1213 if (sz < old_sz || sz > INT_MAX) {
1214 PyErr_SetString(PyExc_OverflowError,
1215 "join() is too long for a Python string");
1216 Py_DECREF(seq);
1217 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001218 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001219 }
1220
1221 /* Allocate result space. */
1222 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1223 if (res == NULL) {
1224 Py_DECREF(seq);
1225 return NULL;
1226 }
1227
1228 /* Catenate everything. */
1229 p = PyString_AS_STRING(res);
1230 for (i = 0; i < seqlen; ++i) {
1231 size_t n;
1232 item = PySequence_Fast_GET_ITEM(seq, i);
1233 n = PyString_GET_SIZE(item);
1234 memcpy(p, PyString_AS_STRING(item), n);
1235 p += n;
1236 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001237 memcpy(p, sep, seplen);
1238 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001239 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001240 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001241
Jeremy Hylton49048292000-07-11 03:28:17 +00001242 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001243 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001244}
1245
Tim Peters52e155e2001-06-16 05:42:57 +00001246PyObject *
1247_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001248{
Tim Petersa7259592001-06-16 05:11:17 +00001249 assert(sep != NULL && PyString_Check(sep));
1250 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001251 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001252}
1253
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001254static long
Fred Drakeba096332000-07-09 07:04:36 +00001255string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001256{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001257 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001258 int len = PyString_GET_SIZE(self);
1259 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001260 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001261
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001262 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001263 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001264 return -2;
1265 if (PyString_Check(subobj)) {
1266 sub = PyString_AS_STRING(subobj);
1267 n = PyString_GET_SIZE(subobj);
1268 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001269#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001270 else if (PyUnicode_Check(subobj))
1271 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001272#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001273 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001274 return -2;
1275
1276 if (last > len)
1277 last = len;
1278 if (last < 0)
1279 last += len;
1280 if (last < 0)
1281 last = 0;
1282 if (i < 0)
1283 i += len;
1284 if (i < 0)
1285 i = 0;
1286
Guido van Rossum4c08d552000-03-10 22:55:18 +00001287 if (dir > 0) {
1288 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001290 last -= n;
1291 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001292 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001293 return (long)i;
1294 }
1295 else {
1296 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001297
Guido van Rossum4c08d552000-03-10 22:55:18 +00001298 if (n == 0 && i <= last)
1299 return (long)last;
1300 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001301 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001302 return (long)j;
1303 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001304
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305 return -1;
1306}
1307
1308
1309static char find__doc__[] =
1310"S.find(sub [,start [,end]]) -> int\n\
1311\n\
1312Return the lowest index in S where substring sub is found,\n\
1313such that sub is contained within s[start,end]. Optional\n\
1314arguments start and end are interpreted as in slice notation.\n\
1315\n\
1316Return -1 on failure.";
1317
1318static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001319string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001321 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001322 if (result == -2)
1323 return NULL;
1324 return PyInt_FromLong(result);
1325}
1326
1327
1328static char index__doc__[] =
1329"S.index(sub [,start [,end]]) -> int\n\
1330\n\
1331Like S.find() but raise ValueError when the substring is not found.";
1332
1333static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001334string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001336 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001337 if (result == -2)
1338 return NULL;
1339 if (result == -1) {
1340 PyErr_SetString(PyExc_ValueError,
1341 "substring not found in string.index");
1342 return NULL;
1343 }
1344 return PyInt_FromLong(result);
1345}
1346
1347
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348static char rfind__doc__[] =
1349"S.rfind(sub [,start [,end]]) -> int\n\
1350\n\
1351Return the highest index in S where substring sub is found,\n\
1352such that sub is contained within s[start,end]. Optional\n\
1353arguments start and end are interpreted as in slice notation.\n\
1354\n\
1355Return -1 on failure.";
1356
1357static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001358string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001359{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001360 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001361 if (result == -2)
1362 return NULL;
1363 return PyInt_FromLong(result);
1364}
1365
1366
1367static char rindex__doc__[] =
1368"S.rindex(sub [,start [,end]]) -> int\n\
1369\n\
1370Like S.rfind() but raise ValueError when the substring is not found.";
1371
1372static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001373string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001375 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376 if (result == -2)
1377 return NULL;
1378 if (result == -1) {
1379 PyErr_SetString(PyExc_ValueError,
1380 "substring not found in string.rindex");
1381 return NULL;
1382 }
1383 return PyInt_FromLong(result);
1384}
1385
1386
1387static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001388do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389{
1390 char *s = PyString_AS_STRING(self);
1391 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001392
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393 i = 0;
1394 if (striptype != RIGHTSTRIP) {
1395 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1396 i++;
1397 }
1398 }
1399
1400 j = len;
1401 if (striptype != LEFTSTRIP) {
1402 do {
1403 j--;
1404 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1405 j++;
1406 }
1407
Tim Peters8fa5dd02001-09-12 02:18:30 +00001408 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001409 Py_INCREF(self);
1410 return (PyObject*)self;
1411 }
1412 else
1413 return PyString_FromStringAndSize(s+i, j-i);
1414}
1415
1416
1417static char strip__doc__[] =
1418"S.strip() -> string\n\
1419\n\
1420Return a copy of the string S with leading and trailing\n\
1421whitespace removed.";
1422
1423static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001424string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001426 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001427}
1428
1429
1430static char lstrip__doc__[] =
1431"S.lstrip() -> string\n\
1432\n\
1433Return a copy of the string S with leading whitespace removed.";
1434
1435static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001436string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001438 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439}
1440
1441
1442static char rstrip__doc__[] =
1443"S.rstrip() -> string\n\
1444\n\
1445Return a copy of the string S with trailing whitespace removed.";
1446
1447static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001448string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001450 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001451}
1452
1453
1454static char lower__doc__[] =
1455"S.lower() -> string\n\
1456\n\
1457Return a copy of the string S converted to lowercase.";
1458
1459static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001460string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461{
1462 char *s = PyString_AS_STRING(self), *s_new;
1463 int i, n = PyString_GET_SIZE(self);
1464 PyObject *new;
1465
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001466 new = PyString_FromStringAndSize(NULL, n);
1467 if (new == NULL)
1468 return NULL;
1469 s_new = PyString_AsString(new);
1470 for (i = 0; i < n; i++) {
1471 int c = Py_CHARMASK(*s++);
1472 if (isupper(c)) {
1473 *s_new = tolower(c);
1474 } else
1475 *s_new = c;
1476 s_new++;
1477 }
1478 return new;
1479}
1480
1481
1482static char upper__doc__[] =
1483"S.upper() -> string\n\
1484\n\
1485Return a copy of the string S converted to uppercase.";
1486
1487static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001488string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489{
1490 char *s = PyString_AS_STRING(self), *s_new;
1491 int i, n = PyString_GET_SIZE(self);
1492 PyObject *new;
1493
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 new = PyString_FromStringAndSize(NULL, n);
1495 if (new == NULL)
1496 return NULL;
1497 s_new = PyString_AsString(new);
1498 for (i = 0; i < n; i++) {
1499 int c = Py_CHARMASK(*s++);
1500 if (islower(c)) {
1501 *s_new = toupper(c);
1502 } else
1503 *s_new = c;
1504 s_new++;
1505 }
1506 return new;
1507}
1508
1509
Guido van Rossum4c08d552000-03-10 22:55:18 +00001510static char title__doc__[] =
1511"S.title() -> string\n\
1512\n\
1513Return a titlecased version of S, i.e. words start with uppercase\n\
1514characters, all remaining cased characters have lowercase.";
1515
1516static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001517string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001518{
1519 char *s = PyString_AS_STRING(self), *s_new;
1520 int i, n = PyString_GET_SIZE(self);
1521 int previous_is_cased = 0;
1522 PyObject *new;
1523
Guido van Rossum4c08d552000-03-10 22:55:18 +00001524 new = PyString_FromStringAndSize(NULL, n);
1525 if (new == NULL)
1526 return NULL;
1527 s_new = PyString_AsString(new);
1528 for (i = 0; i < n; i++) {
1529 int c = Py_CHARMASK(*s++);
1530 if (islower(c)) {
1531 if (!previous_is_cased)
1532 c = toupper(c);
1533 previous_is_cased = 1;
1534 } else if (isupper(c)) {
1535 if (previous_is_cased)
1536 c = tolower(c);
1537 previous_is_cased = 1;
1538 } else
1539 previous_is_cased = 0;
1540 *s_new++ = c;
1541 }
1542 return new;
1543}
1544
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545static char capitalize__doc__[] =
1546"S.capitalize() -> string\n\
1547\n\
1548Return a copy of the string S with only its first character\n\
1549capitalized.";
1550
1551static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001552string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553{
1554 char *s = PyString_AS_STRING(self), *s_new;
1555 int i, n = PyString_GET_SIZE(self);
1556 PyObject *new;
1557
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001558 new = PyString_FromStringAndSize(NULL, n);
1559 if (new == NULL)
1560 return NULL;
1561 s_new = PyString_AsString(new);
1562 if (0 < n) {
1563 int c = Py_CHARMASK(*s++);
1564 if (islower(c))
1565 *s_new = toupper(c);
1566 else
1567 *s_new = c;
1568 s_new++;
1569 }
1570 for (i = 1; i < n; i++) {
1571 int c = Py_CHARMASK(*s++);
1572 if (isupper(c))
1573 *s_new = tolower(c);
1574 else
1575 *s_new = c;
1576 s_new++;
1577 }
1578 return new;
1579}
1580
1581
1582static char count__doc__[] =
1583"S.count(sub[, start[, end]]) -> int\n\
1584\n\
1585Return the number of occurrences of substring sub in string\n\
1586S[start:end]. Optional arguments start and end are\n\
1587interpreted as in slice notation.";
1588
1589static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001590string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001592 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593 int len = PyString_GET_SIZE(self), n;
1594 int i = 0, last = INT_MAX;
1595 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001596 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597
Guido van Rossumc6821402000-05-08 14:08:05 +00001598 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1599 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001601
Guido van Rossum4c08d552000-03-10 22:55:18 +00001602 if (PyString_Check(subobj)) {
1603 sub = PyString_AS_STRING(subobj);
1604 n = PyString_GET_SIZE(subobj);
1605 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001606#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001607 else if (PyUnicode_Check(subobj)) {
1608 int count;
1609 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1610 if (count == -1)
1611 return NULL;
1612 else
1613 return PyInt_FromLong((long) count);
1614 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001615#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1617 return NULL;
1618
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619 if (last > len)
1620 last = len;
1621 if (last < 0)
1622 last += len;
1623 if (last < 0)
1624 last = 0;
1625 if (i < 0)
1626 i += len;
1627 if (i < 0)
1628 i = 0;
1629 m = last + 1 - n;
1630 if (n == 0)
1631 return PyInt_FromLong((long) (m-i));
1632
1633 r = 0;
1634 while (i < m) {
1635 if (!memcmp(s+i, sub, n)) {
1636 r++;
1637 i += n;
1638 } else {
1639 i++;
1640 }
1641 }
1642 return PyInt_FromLong((long) r);
1643}
1644
1645
1646static char swapcase__doc__[] =
1647"S.swapcase() -> string\n\
1648\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001649Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001650converted to lowercase and vice versa.";
1651
1652static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001653string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654{
1655 char *s = PyString_AS_STRING(self), *s_new;
1656 int i, n = PyString_GET_SIZE(self);
1657 PyObject *new;
1658
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001659 new = PyString_FromStringAndSize(NULL, n);
1660 if (new == NULL)
1661 return NULL;
1662 s_new = PyString_AsString(new);
1663 for (i = 0; i < n; i++) {
1664 int c = Py_CHARMASK(*s++);
1665 if (islower(c)) {
1666 *s_new = toupper(c);
1667 }
1668 else if (isupper(c)) {
1669 *s_new = tolower(c);
1670 }
1671 else
1672 *s_new = c;
1673 s_new++;
1674 }
1675 return new;
1676}
1677
1678
1679static char translate__doc__[] =
1680"S.translate(table [,deletechars]) -> string\n\
1681\n\
1682Return a copy of the string S, where all characters occurring\n\
1683in the optional argument deletechars are removed, and the\n\
1684remaining characters have been mapped through the given\n\
1685translation table, which must be a string of length 256.";
1686
1687static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001688string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001690 register char *input, *output;
1691 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692 register int i, c, changed = 0;
1693 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001694 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 int inlen, tablen, dellen = 0;
1696 PyObject *result;
1697 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001698 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001699
Guido van Rossum4c08d552000-03-10 22:55:18 +00001700 if (!PyArg_ParseTuple(args, "O|O:translate",
1701 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001702 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001703
1704 if (PyString_Check(tableobj)) {
1705 table1 = PyString_AS_STRING(tableobj);
1706 tablen = PyString_GET_SIZE(tableobj);
1707 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001708#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001709 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001710 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001711 parameter; instead a mapping to None will cause characters
1712 to be deleted. */
1713 if (delobj != NULL) {
1714 PyErr_SetString(PyExc_TypeError,
1715 "deletions are implemented differently for unicode");
1716 return NULL;
1717 }
1718 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1719 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001720#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001721 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001723
1724 if (delobj != NULL) {
1725 if (PyString_Check(delobj)) {
1726 del_table = PyString_AS_STRING(delobj);
1727 dellen = PyString_GET_SIZE(delobj);
1728 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001729#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001730 else if (PyUnicode_Check(delobj)) {
1731 PyErr_SetString(PyExc_TypeError,
1732 "deletions are implemented differently for unicode");
1733 return NULL;
1734 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001735#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001736 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1737 return NULL;
1738
1739 if (tablen != 256) {
1740 PyErr_SetString(PyExc_ValueError,
1741 "translation table must be 256 characters long");
1742 return NULL;
1743 }
1744 }
1745 else {
1746 del_table = NULL;
1747 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001748 }
1749
1750 table = table1;
1751 inlen = PyString_Size(input_obj);
1752 result = PyString_FromStringAndSize((char *)NULL, inlen);
1753 if (result == NULL)
1754 return NULL;
1755 output_start = output = PyString_AsString(result);
1756 input = PyString_AsString(input_obj);
1757
1758 if (dellen == 0) {
1759 /* If no deletions are required, use faster code */
1760 for (i = inlen; --i >= 0; ) {
1761 c = Py_CHARMASK(*input++);
1762 if (Py_CHARMASK((*output++ = table[c])) != c)
1763 changed = 1;
1764 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001765 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766 return result;
1767 Py_DECREF(result);
1768 Py_INCREF(input_obj);
1769 return input_obj;
1770 }
1771
1772 for (i = 0; i < 256; i++)
1773 trans_table[i] = Py_CHARMASK(table[i]);
1774
1775 for (i = 0; i < dellen; i++)
1776 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1777
1778 for (i = inlen; --i >= 0; ) {
1779 c = Py_CHARMASK(*input++);
1780 if (trans_table[c] != -1)
1781 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1782 continue;
1783 changed = 1;
1784 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001785 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786 Py_DECREF(result);
1787 Py_INCREF(input_obj);
1788 return input_obj;
1789 }
1790 /* Fix the size of the resulting string */
1791 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1792 return NULL;
1793 return result;
1794}
1795
1796
1797/* What follows is used for implementing replace(). Perry Stoll. */
1798
1799/*
1800 mymemfind
1801
1802 strstr replacement for arbitrary blocks of memory.
1803
Barry Warsaw51ac5802000-03-20 16:36:48 +00001804 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805 contents of memory pointed to by PAT. Returns the index into MEM if
1806 found, or -1 if not found. If len of PAT is greater than length of
1807 MEM, the function returns -1.
1808*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001809static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001810mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811{
1812 register int ii;
1813
1814 /* pattern can not occur in the last pat_len-1 chars */
1815 len -= pat_len;
1816
1817 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001818 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 return ii;
1820 }
1821 }
1822 return -1;
1823}
1824
1825/*
1826 mymemcnt
1827
1828 Return the number of distinct times PAT is found in MEM.
1829 meaning mem=1111 and pat==11 returns 2.
1830 mem=11111 and pat==11 also return 2.
1831 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001832static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001833mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834{
1835 register int offset = 0;
1836 int nfound = 0;
1837
1838 while (len >= 0) {
1839 offset = mymemfind(mem, len, pat, pat_len);
1840 if (offset == -1)
1841 break;
1842 mem += offset + pat_len;
1843 len -= offset + pat_len;
1844 nfound++;
1845 }
1846 return nfound;
1847}
1848
1849/*
1850 mymemreplace
1851
Thomas Wouters7e474022000-07-16 12:04:32 +00001852 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853 replaced with SUB.
1854
Thomas Wouters7e474022000-07-16 12:04:32 +00001855 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856 of PAT in STR, then the original string is returned. Otherwise, a new
1857 string is allocated here and returned.
1858
1859 on return, out_len is:
1860 the length of output string, or
1861 -1 if the input string is returned, or
1862 unchanged if an error occurs (no memory).
1863
1864 return value is:
1865 the new string allocated locally, or
1866 NULL if an error occurred.
1867*/
1868static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001869mymemreplace(const char *str, int len, /* input string */
1870 const char *pat, int pat_len, /* pattern string to find */
1871 const char *sub, int sub_len, /* substitution string */
1872 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001873 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001874{
1875 char *out_s;
1876 char *new_s;
1877 int nfound, offset, new_len;
1878
1879 if (len == 0 || pat_len > len)
1880 goto return_same;
1881
1882 /* find length of output string */
1883 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001884 if (count < 0)
1885 count = INT_MAX;
1886 else if (nfound > count)
1887 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888 if (nfound == 0)
1889 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001890
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001892 if (new_len == 0) {
1893 /* Have to allocate something for the caller to free(). */
1894 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001895 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001896 return NULL;
1897 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001899 else {
1900 assert(new_len > 0);
1901 new_s = (char *)PyMem_MALLOC(new_len);
1902 if (new_s == NULL)
1903 return NULL;
1904 out_s = new_s;
1905
Tim Peters9c012af2001-05-10 00:32:57 +00001906 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001907 /* find index of next instance of pattern */
1908 offset = mymemfind(str, len, pat, pat_len);
1909 if (offset == -1)
1910 break;
1911
1912 /* copy non matching part of input string */
1913 memcpy(new_s, str, offset);
1914 str += offset + pat_len;
1915 len -= offset + pat_len;
1916
1917 /* copy substitute into the output string */
1918 new_s += offset;
1919 memcpy(new_s, sub, sub_len);
1920 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001921 }
1922 /* copy any remaining values into output string */
1923 if (len > 0)
1924 memcpy(new_s, str, len);
1925 }
1926 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927 return out_s;
1928
1929 return_same:
1930 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001931 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932}
1933
1934
1935static char replace__doc__[] =
1936"S.replace (old, new[, maxsplit]) -> string\n\
1937\n\
1938Return a copy of string S with all occurrences of substring\n\
1939old replaced by new. If the optional argument maxsplit is\n\
1940given, only the first maxsplit occurrences are replaced.";
1941
1942static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001943string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001945 const char *str = PyString_AS_STRING(self), *sub, *repl;
1946 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001947 const int len = PyString_GET_SIZE(self);
1948 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001949 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001951 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952
Guido van Rossum4c08d552000-03-10 22:55:18 +00001953 if (!PyArg_ParseTuple(args, "OO|i:replace",
1954 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001956
1957 if (PyString_Check(subobj)) {
1958 sub = PyString_AS_STRING(subobj);
1959 sub_len = PyString_GET_SIZE(subobj);
1960 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001961#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001962 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001963 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001964 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001965#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001966 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1967 return NULL;
1968
1969 if (PyString_Check(replobj)) {
1970 repl = PyString_AS_STRING(replobj);
1971 repl_len = PyString_GET_SIZE(replobj);
1972 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001973#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001974 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001975 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001976 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001977#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001978 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1979 return NULL;
1980
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001981 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001982 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983 return NULL;
1984 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001985 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986 if (new_s == NULL) {
1987 PyErr_NoMemory();
1988 return NULL;
1989 }
1990 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001991 if (PyString_CheckExact(self)) {
1992 /* we're returning another reference to self */
1993 new = (PyObject*)self;
1994 Py_INCREF(new);
1995 }
1996 else {
1997 new = PyString_FromStringAndSize(str, len);
1998 if (new == NULL)
1999 return NULL;
2000 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001 }
2002 else {
2003 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002004 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005 }
2006 return new;
2007}
2008
2009
2010static char startswith__doc__[] =
2011"S.startswith(prefix[, start[, end]]) -> int\n\
2012\n\
2013Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
2014optional start, test S beginning at that position. With optional end, stop\n\
2015comparing S at that position.";
2016
2017static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002018string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002020 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002022 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023 int plen;
2024 int start = 0;
2025 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002026 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027
Guido van Rossumc6821402000-05-08 14:08:05 +00002028 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2029 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002030 return NULL;
2031 if (PyString_Check(subobj)) {
2032 prefix = PyString_AS_STRING(subobj);
2033 plen = PyString_GET_SIZE(subobj);
2034 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002035#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002036 else if (PyUnicode_Check(subobj)) {
2037 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002038 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002039 subobj, start, end, -1);
2040 if (rc == -1)
2041 return NULL;
2042 else
2043 return PyInt_FromLong((long) rc);
2044 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002045#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002046 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047 return NULL;
2048
2049 /* adopt Java semantics for index out of range. it is legal for
2050 * offset to be == plen, but this only returns true if prefix is
2051 * the empty string.
2052 */
2053 if (start < 0 || start+plen > len)
2054 return PyInt_FromLong(0);
2055
2056 if (!memcmp(str+start, prefix, plen)) {
2057 /* did the match end after the specified end? */
2058 if (end < 0)
2059 return PyInt_FromLong(1);
2060 else if (end - start < plen)
2061 return PyInt_FromLong(0);
2062 else
2063 return PyInt_FromLong(1);
2064 }
2065 else return PyInt_FromLong(0);
2066}
2067
2068
2069static char endswith__doc__[] =
2070"S.endswith(suffix[, start[, end]]) -> int\n\
2071\n\
2072Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2073optional start, test S beginning at that position. With optional end, stop\n\
2074comparing S at that position.";
2075
2076static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002077string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002079 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002081 const char* suffix;
2082 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083 int start = 0;
2084 int end = -1;
2085 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002086 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002087
Guido van Rossumc6821402000-05-08 14:08:05 +00002088 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2089 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002090 return NULL;
2091 if (PyString_Check(subobj)) {
2092 suffix = PyString_AS_STRING(subobj);
2093 slen = PyString_GET_SIZE(subobj);
2094 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002095#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002096 else if (PyUnicode_Check(subobj)) {
2097 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002098 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002099 subobj, start, end, +1);
2100 if (rc == -1)
2101 return NULL;
2102 else
2103 return PyInt_FromLong((long) rc);
2104 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002105#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002106 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107 return NULL;
2108
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110 return PyInt_FromLong(0);
2111
2112 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002113 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114
Guido van Rossum4c08d552000-03-10 22:55:18 +00002115 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116 return PyInt_FromLong(1);
2117 else return PyInt_FromLong(0);
2118}
2119
2120
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002121static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002122"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002123\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002124Encodes S using the codec registered for encoding. encoding defaults\n\
2125to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002126handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2127a ValueError. Other possible values are 'ignore' and 'replace'.";
2128
2129static PyObject *
2130string_encode(PyStringObject *self, PyObject *args)
2131{
2132 char *encoding = NULL;
2133 char *errors = NULL;
2134 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2135 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002136 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2137}
2138
2139
2140static char decode__doc__[] =
2141"S.decode([encoding[,errors]]) -> object\n\
2142\n\
2143Decodes S using the codec registered for encoding. encoding defaults\n\
2144to the default encoding. errors may be given to set a different error\n\
2145handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2146a ValueError. Other possible values are 'ignore' and 'replace'.";
2147
2148static PyObject *
2149string_decode(PyStringObject *self, PyObject *args)
2150{
2151 char *encoding = NULL;
2152 char *errors = NULL;
2153 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2154 return NULL;
2155 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002156}
2157
2158
Guido van Rossum4c08d552000-03-10 22:55:18 +00002159static char expandtabs__doc__[] =
2160"S.expandtabs([tabsize]) -> string\n\
2161\n\
2162Return a copy of S where all tab characters are expanded using spaces.\n\
2163If tabsize is not given, a tab size of 8 characters is assumed.";
2164
2165static PyObject*
2166string_expandtabs(PyStringObject *self, PyObject *args)
2167{
2168 const char *e, *p;
2169 char *q;
2170 int i, j;
2171 PyObject *u;
2172 int tabsize = 8;
2173
2174 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2175 return NULL;
2176
Thomas Wouters7e474022000-07-16 12:04:32 +00002177 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002178 i = j = 0;
2179 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2180 for (p = PyString_AS_STRING(self); p < e; p++)
2181 if (*p == '\t') {
2182 if (tabsize > 0)
2183 j += tabsize - (j % tabsize);
2184 }
2185 else {
2186 j++;
2187 if (*p == '\n' || *p == '\r') {
2188 i += j;
2189 j = 0;
2190 }
2191 }
2192
2193 /* Second pass: create output string and fill it */
2194 u = PyString_FromStringAndSize(NULL, i + j);
2195 if (!u)
2196 return NULL;
2197
2198 j = 0;
2199 q = PyString_AS_STRING(u);
2200
2201 for (p = PyString_AS_STRING(self); p < e; p++)
2202 if (*p == '\t') {
2203 if (tabsize > 0) {
2204 i = tabsize - (j % tabsize);
2205 j += i;
2206 while (i--)
2207 *q++ = ' ';
2208 }
2209 }
2210 else {
2211 j++;
2212 *q++ = *p;
2213 if (*p == '\n' || *p == '\r')
2214 j = 0;
2215 }
2216
2217 return u;
2218}
2219
Tim Peters8fa5dd02001-09-12 02:18:30 +00002220static PyObject *
2221pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222{
2223 PyObject *u;
2224
2225 if (left < 0)
2226 left = 0;
2227 if (right < 0)
2228 right = 0;
2229
Tim Peters8fa5dd02001-09-12 02:18:30 +00002230 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002231 Py_INCREF(self);
2232 return (PyObject *)self;
2233 }
2234
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002235 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002236 left + PyString_GET_SIZE(self) + right);
2237 if (u) {
2238 if (left)
2239 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002240 memcpy(PyString_AS_STRING(u) + left,
2241 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002242 PyString_GET_SIZE(self));
2243 if (right)
2244 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2245 fill, right);
2246 }
2247
2248 return u;
2249}
2250
2251static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002252"S.ljust(width) -> string\n"
2253"\n"
2254"Return S left justified in a string of length width. Padding is\n"
2255"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256
2257static PyObject *
2258string_ljust(PyStringObject *self, PyObject *args)
2259{
2260 int width;
2261 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2262 return NULL;
2263
Tim Peters8fa5dd02001-09-12 02:18:30 +00002264 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265 Py_INCREF(self);
2266 return (PyObject*) self;
2267 }
2268
2269 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2270}
2271
2272
2273static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002274"S.rjust(width) -> string\n"
2275"\n"
2276"Return S right justified in a string of length width. Padding is\n"
2277"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278
2279static PyObject *
2280string_rjust(PyStringObject *self, PyObject *args)
2281{
2282 int width;
2283 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2284 return NULL;
2285
Tim Peters8fa5dd02001-09-12 02:18:30 +00002286 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287 Py_INCREF(self);
2288 return (PyObject*) self;
2289 }
2290
2291 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2292}
2293
2294
2295static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002296"S.center(width) -> string\n"
2297"\n"
2298"Return S centered in a string of length width. Padding is done\n"
2299"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300
2301static PyObject *
2302string_center(PyStringObject *self, PyObject *args)
2303{
2304 int marg, left;
2305 int width;
2306
2307 if (!PyArg_ParseTuple(args, "i:center", &width))
2308 return NULL;
2309
Tim Peters8fa5dd02001-09-12 02:18:30 +00002310 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311 Py_INCREF(self);
2312 return (PyObject*) self;
2313 }
2314
2315 marg = width - PyString_GET_SIZE(self);
2316 left = marg / 2 + (marg & width & 1);
2317
2318 return pad(self, left, marg - left, ' ');
2319}
2320
Guido van Rossum4c08d552000-03-10 22:55:18 +00002321static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002322"S.isspace() -> int\n"
2323"\n"
2324"Return 1 if there are only whitespace characters in S,\n"
2325"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326
2327static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002328string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002329{
Fred Drakeba096332000-07-09 07:04:36 +00002330 register const unsigned char *p
2331 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002332 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002333
Guido van Rossum4c08d552000-03-10 22:55:18 +00002334 /* Shortcut for single character strings */
2335 if (PyString_GET_SIZE(self) == 1 &&
2336 isspace(*p))
2337 return PyInt_FromLong(1);
2338
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002339 /* Special case for empty strings */
2340 if (PyString_GET_SIZE(self) == 0)
2341 return PyInt_FromLong(0);
2342
Guido van Rossum4c08d552000-03-10 22:55:18 +00002343 e = p + PyString_GET_SIZE(self);
2344 for (; p < e; p++) {
2345 if (!isspace(*p))
2346 return PyInt_FromLong(0);
2347 }
2348 return PyInt_FromLong(1);
2349}
2350
2351
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002352static char isalpha__doc__[] =
2353"S.isalpha() -> int\n\
2354\n\
2355Return 1 if all characters in S are alphabetic\n\
2356and there is at least one character in S, 0 otherwise.";
2357
2358static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002359string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002360{
Fred Drakeba096332000-07-09 07:04:36 +00002361 register const unsigned char *p
2362 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002363 register const unsigned char *e;
2364
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002365 /* Shortcut for single character strings */
2366 if (PyString_GET_SIZE(self) == 1 &&
2367 isalpha(*p))
2368 return PyInt_FromLong(1);
2369
2370 /* Special case for empty strings */
2371 if (PyString_GET_SIZE(self) == 0)
2372 return PyInt_FromLong(0);
2373
2374 e = p + PyString_GET_SIZE(self);
2375 for (; p < e; p++) {
2376 if (!isalpha(*p))
2377 return PyInt_FromLong(0);
2378 }
2379 return PyInt_FromLong(1);
2380}
2381
2382
2383static char isalnum__doc__[] =
2384"S.isalnum() -> int\n\
2385\n\
2386Return 1 if all characters in S are alphanumeric\n\
2387and there is at least one character in S, 0 otherwise.";
2388
2389static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002390string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002391{
Fred Drakeba096332000-07-09 07:04:36 +00002392 register const unsigned char *p
2393 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002394 register const unsigned char *e;
2395
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002396 /* Shortcut for single character strings */
2397 if (PyString_GET_SIZE(self) == 1 &&
2398 isalnum(*p))
2399 return PyInt_FromLong(1);
2400
2401 /* Special case for empty strings */
2402 if (PyString_GET_SIZE(self) == 0)
2403 return PyInt_FromLong(0);
2404
2405 e = p + PyString_GET_SIZE(self);
2406 for (; p < e; p++) {
2407 if (!isalnum(*p))
2408 return PyInt_FromLong(0);
2409 }
2410 return PyInt_FromLong(1);
2411}
2412
2413
Guido van Rossum4c08d552000-03-10 22:55:18 +00002414static char isdigit__doc__[] =
2415"S.isdigit() -> int\n\
2416\n\
2417Return 1 if there are only digit characters in S,\n\
24180 otherwise.";
2419
2420static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002421string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002422{
Fred Drakeba096332000-07-09 07:04:36 +00002423 register const unsigned char *p
2424 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002425 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002426
Guido van Rossum4c08d552000-03-10 22:55:18 +00002427 /* Shortcut for single character strings */
2428 if (PyString_GET_SIZE(self) == 1 &&
2429 isdigit(*p))
2430 return PyInt_FromLong(1);
2431
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002432 /* Special case for empty strings */
2433 if (PyString_GET_SIZE(self) == 0)
2434 return PyInt_FromLong(0);
2435
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436 e = p + PyString_GET_SIZE(self);
2437 for (; p < e; p++) {
2438 if (!isdigit(*p))
2439 return PyInt_FromLong(0);
2440 }
2441 return PyInt_FromLong(1);
2442}
2443
2444
2445static char islower__doc__[] =
2446"S.islower() -> int\n\
2447\n\
2448Return 1 if all cased characters in S are lowercase and there is\n\
2449at least one cased character in S, 0 otherwise.";
2450
2451static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002452string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002453{
Fred Drakeba096332000-07-09 07:04:36 +00002454 register const unsigned char *p
2455 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002456 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002457 int cased;
2458
Guido van Rossum4c08d552000-03-10 22:55:18 +00002459 /* Shortcut for single character strings */
2460 if (PyString_GET_SIZE(self) == 1)
2461 return PyInt_FromLong(islower(*p) != 0);
2462
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002463 /* Special case for empty strings */
2464 if (PyString_GET_SIZE(self) == 0)
2465 return PyInt_FromLong(0);
2466
Guido van Rossum4c08d552000-03-10 22:55:18 +00002467 e = p + PyString_GET_SIZE(self);
2468 cased = 0;
2469 for (; p < e; p++) {
2470 if (isupper(*p))
2471 return PyInt_FromLong(0);
2472 else if (!cased && islower(*p))
2473 cased = 1;
2474 }
2475 return PyInt_FromLong(cased);
2476}
2477
2478
2479static char isupper__doc__[] =
2480"S.isupper() -> int\n\
2481\n\
2482Return 1 if all cased characters in S are uppercase and there is\n\
2483at least one cased character in S, 0 otherwise.";
2484
2485static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002486string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002487{
Fred Drakeba096332000-07-09 07:04:36 +00002488 register const unsigned char *p
2489 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002490 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002491 int cased;
2492
Guido van Rossum4c08d552000-03-10 22:55:18 +00002493 /* Shortcut for single character strings */
2494 if (PyString_GET_SIZE(self) == 1)
2495 return PyInt_FromLong(isupper(*p) != 0);
2496
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002497 /* Special case for empty strings */
2498 if (PyString_GET_SIZE(self) == 0)
2499 return PyInt_FromLong(0);
2500
Guido van Rossum4c08d552000-03-10 22:55:18 +00002501 e = p + PyString_GET_SIZE(self);
2502 cased = 0;
2503 for (; p < e; p++) {
2504 if (islower(*p))
2505 return PyInt_FromLong(0);
2506 else if (!cased && isupper(*p))
2507 cased = 1;
2508 }
2509 return PyInt_FromLong(cased);
2510}
2511
2512
2513static char istitle__doc__[] =
2514"S.istitle() -> int\n\
2515\n\
2516Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2517may only follow uncased characters and lowercase characters only cased\n\
2518ones. Return 0 otherwise.";
2519
2520static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002521string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522{
Fred Drakeba096332000-07-09 07:04:36 +00002523 register const unsigned char *p
2524 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002525 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526 int cased, previous_is_cased;
2527
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528 /* Shortcut for single character strings */
2529 if (PyString_GET_SIZE(self) == 1)
2530 return PyInt_FromLong(isupper(*p) != 0);
2531
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002532 /* Special case for empty strings */
2533 if (PyString_GET_SIZE(self) == 0)
2534 return PyInt_FromLong(0);
2535
Guido van Rossum4c08d552000-03-10 22:55:18 +00002536 e = p + PyString_GET_SIZE(self);
2537 cased = 0;
2538 previous_is_cased = 0;
2539 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002540 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002541
2542 if (isupper(ch)) {
2543 if (previous_is_cased)
2544 return PyInt_FromLong(0);
2545 previous_is_cased = 1;
2546 cased = 1;
2547 }
2548 else if (islower(ch)) {
2549 if (!previous_is_cased)
2550 return PyInt_FromLong(0);
2551 previous_is_cased = 1;
2552 cased = 1;
2553 }
2554 else
2555 previous_is_cased = 0;
2556 }
2557 return PyInt_FromLong(cased);
2558}
2559
2560
2561static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002562"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002563\n\
2564Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002565Line breaks are not included in the resulting list unless keepends\n\
2566is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002567
2568#define SPLIT_APPEND(data, left, right) \
2569 str = PyString_FromStringAndSize(data + left, right - left); \
2570 if (!str) \
2571 goto onError; \
2572 if (PyList_Append(list, str)) { \
2573 Py_DECREF(str); \
2574 goto onError; \
2575 } \
2576 else \
2577 Py_DECREF(str);
2578
2579static PyObject*
2580string_splitlines(PyStringObject *self, PyObject *args)
2581{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002582 register int i;
2583 register int j;
2584 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002585 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002586 PyObject *list;
2587 PyObject *str;
2588 char *data;
2589
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002590 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002591 return NULL;
2592
2593 data = PyString_AS_STRING(self);
2594 len = PyString_GET_SIZE(self);
2595
Guido van Rossum4c08d552000-03-10 22:55:18 +00002596 list = PyList_New(0);
2597 if (!list)
2598 goto onError;
2599
2600 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002601 int eol;
2602
Guido van Rossum4c08d552000-03-10 22:55:18 +00002603 /* Find a line and append it */
2604 while (i < len && data[i] != '\n' && data[i] != '\r')
2605 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606
2607 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002608 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002609 if (i < len) {
2610 if (data[i] == '\r' && i + 1 < len &&
2611 data[i+1] == '\n')
2612 i += 2;
2613 else
2614 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002615 if (keepends)
2616 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002617 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002618 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002619 j = i;
2620 }
2621 if (j < len) {
2622 SPLIT_APPEND(data, j, len);
2623 }
2624
2625 return list;
2626
2627 onError:
2628 Py_DECREF(list);
2629 return NULL;
2630}
2631
2632#undef SPLIT_APPEND
2633
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002634
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002635static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002636string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002637 /* Counterparts of the obsolete stropmodule functions; except
2638 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002639 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2640 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2641 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2642 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2643 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2644 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2645 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2646 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2647 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2648 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2649 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2650 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2651 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2652 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2653 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2654 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2655 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2656 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2657 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2658 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2659 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2660 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2661 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2662 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2663 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2664 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2665 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2666 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2667 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2668 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2669 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2670 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2671 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002672#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002673 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002674#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002675 {NULL, NULL} /* sentinel */
2676};
2677
Guido van Rossumae960af2001-08-30 03:11:59 +00002678staticforward PyObject *
2679str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2680
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002681static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002682string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002683{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002684 PyObject *x = NULL;
2685 static char *kwlist[] = {"object", 0};
2686
Guido van Rossumae960af2001-08-30 03:11:59 +00002687 if (type != &PyString_Type)
2688 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002689 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2690 return NULL;
2691 if (x == NULL)
2692 return PyString_FromString("");
2693 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002694}
2695
Guido van Rossumae960af2001-08-30 03:11:59 +00002696static PyObject *
2697str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2698{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002699 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002700 int n;
2701
2702 assert(PyType_IsSubtype(type, &PyString_Type));
2703 tmp = string_new(&PyString_Type, args, kwds);
2704 if (tmp == NULL)
2705 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002706 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002707 n = PyString_GET_SIZE(tmp);
2708 pnew = type->tp_alloc(type, n);
2709 if (pnew != NULL) {
2710 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002711 ((PyStringObject *)pnew)->ob_shash =
2712 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002713 ((PyStringObject *)pnew)->ob_sinterned =
2714 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002715 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002716 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002717 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002718}
2719
Tim Peters6d6c1a32001-08-02 04:15:00 +00002720static char string_doc[] =
2721"str(object) -> string\n\
2722\n\
2723Return a nice string representation of the object.\n\
2724If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002725
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002726PyTypeObject PyString_Type = {
2727 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002728 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002729 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002730 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002731 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002732 (destructor)string_dealloc, /* tp_dealloc */
2733 (printfunc)string_print, /* tp_print */
2734 0, /* tp_getattr */
2735 0, /* tp_setattr */
2736 0, /* tp_compare */
2737 (reprfunc)string_repr, /* tp_repr */
2738 0, /* tp_as_number */
2739 &string_as_sequence, /* tp_as_sequence */
2740 0, /* tp_as_mapping */
2741 (hashfunc)string_hash, /* tp_hash */
2742 0, /* tp_call */
2743 (reprfunc)string_str, /* tp_str */
2744 PyObject_GenericGetAttr, /* tp_getattro */
2745 0, /* tp_setattro */
2746 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002747 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002748 string_doc, /* tp_doc */
2749 0, /* tp_traverse */
2750 0, /* tp_clear */
2751 (richcmpfunc)string_richcompare, /* tp_richcompare */
2752 0, /* tp_weaklistoffset */
2753 0, /* tp_iter */
2754 0, /* tp_iternext */
2755 string_methods, /* tp_methods */
2756 0, /* tp_members */
2757 0, /* tp_getset */
2758 0, /* tp_base */
2759 0, /* tp_dict */
2760 0, /* tp_descr_get */
2761 0, /* tp_descr_set */
2762 0, /* tp_dictoffset */
2763 0, /* tp_init */
2764 0, /* tp_alloc */
2765 string_new, /* tp_new */
Neil Schemenauerdcc819a2002-03-22 15:33:15 +00002766 _PyMalloc_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002767};
2768
2769void
Fred Drakeba096332000-07-09 07:04:36 +00002770PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002771{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002772 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002773 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002774 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002775 if (w == NULL || !PyString_Check(*pv)) {
2776 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002777 *pv = NULL;
2778 return;
2779 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002780 v = string_concat((PyStringObject *) *pv, w);
2781 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002782 *pv = v;
2783}
2784
Guido van Rossum013142a1994-08-30 08:19:36 +00002785void
Fred Drakeba096332000-07-09 07:04:36 +00002786PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002787{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002788 PyString_Concat(pv, w);
2789 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002790}
2791
2792
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002793/* The following function breaks the notion that strings are immutable:
2794 it changes the size of a string. We get away with this only if there
2795 is only one module referencing the object. You can also think of it
2796 as creating a new string object and destroying the old one, only
2797 more efficiently. In any case, don't use this if the string may
2798 already be known to some other part of the code... */
2799
2800int
Fred Drakeba096332000-07-09 07:04:36 +00002801_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002802{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002803 register PyObject *v;
2804 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002805 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002806 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002807 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002808 Py_DECREF(v);
2809 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002810 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002811 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002812 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002813#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002814 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002815#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002816 _Py_ForgetReference(v);
2817 *pv = (PyObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +00002818 _PyMalloc_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002819 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002820 if (*pv == NULL) {
Neil Schemenauerdcc819a2002-03-22 15:33:15 +00002821 PyMalloc_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002822 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002823 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002824 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002825 _Py_NewReference(*pv);
2826 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002827 sv->ob_size = newsize;
2828 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002829 return 0;
2830}
Guido van Rossume5372401993-03-16 12:15:04 +00002831
2832/* Helpers for formatstring */
2833
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002834static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002835getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002836{
2837 int argidx = *p_argidx;
2838 if (argidx < arglen) {
2839 (*p_argidx)++;
2840 if (arglen < 0)
2841 return args;
2842 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002843 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002844 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002845 PyErr_SetString(PyExc_TypeError,
2846 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002847 return NULL;
2848}
2849
Tim Peters38fd5b62000-09-21 05:43:11 +00002850/* Format codes
2851 * F_LJUST '-'
2852 * F_SIGN '+'
2853 * F_BLANK ' '
2854 * F_ALT '#'
2855 * F_ZERO '0'
2856 */
Guido van Rossume5372401993-03-16 12:15:04 +00002857#define F_LJUST (1<<0)
2858#define F_SIGN (1<<1)
2859#define F_BLANK (1<<2)
2860#define F_ALT (1<<3)
2861#define F_ZERO (1<<4)
2862
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002863static int
Fred Drakeba096332000-07-09 07:04:36 +00002864formatfloat(char *buf, size_t buflen, int flags,
2865 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002866{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002867 /* fmt = '%#.' + `prec` + `type`
2868 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002869 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002870 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002871 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002872 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002873 if (prec < 0)
2874 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002875 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2876 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00002877 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
2878 (flags&F_ALT) ? "#" : "",
2879 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002880 /* worst case length calc to ensure no buffer overrun:
2881 fmt = %#.<prec>g
2882 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002883 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002884 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2885 If prec=0 the effective precision is 1 (the leading digit is
2886 always given), therefore increase by one to 10+prec. */
2887 if (buflen <= (size_t)10 + (size_t)prec) {
2888 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002889 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002890 return -1;
2891 }
Tim Peters885d4572001-11-28 20:27:42 +00002892 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002893 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002894}
2895
Tim Peters38fd5b62000-09-21 05:43:11 +00002896/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2897 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2898 * Python's regular ints.
2899 * Return value: a new PyString*, or NULL if error.
2900 * . *pbuf is set to point into it,
2901 * *plen set to the # of chars following that.
2902 * Caller must decref it when done using pbuf.
2903 * The string starting at *pbuf is of the form
2904 * "-"? ("0x" | "0X")? digit+
2905 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002906 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002907 * There will be at least prec digits, zero-filled on the left if
2908 * necessary to get that many.
2909 * val object to be converted
2910 * flags bitmask of format flags; only F_ALT is looked at
2911 * prec minimum number of digits; 0-fill on left if needed
2912 * type a character in [duoxX]; u acts the same as d
2913 *
2914 * CAUTION: o, x and X conversions on regular ints can never
2915 * produce a '-' sign, but can for Python's unbounded ints.
2916 */
2917PyObject*
2918_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2919 char **pbuf, int *plen)
2920{
2921 PyObject *result = NULL;
2922 char *buf;
2923 int i;
2924 int sign; /* 1 if '-', else 0 */
2925 int len; /* number of characters */
2926 int numdigits; /* len == numnondigits + numdigits */
2927 int numnondigits = 0;
2928
2929 switch (type) {
2930 case 'd':
2931 case 'u':
2932 result = val->ob_type->tp_str(val);
2933 break;
2934 case 'o':
2935 result = val->ob_type->tp_as_number->nb_oct(val);
2936 break;
2937 case 'x':
2938 case 'X':
2939 numnondigits = 2;
2940 result = val->ob_type->tp_as_number->nb_hex(val);
2941 break;
2942 default:
2943 assert(!"'type' not in [duoxX]");
2944 }
2945 if (!result)
2946 return NULL;
2947
2948 /* To modify the string in-place, there can only be one reference. */
2949 if (result->ob_refcnt != 1) {
2950 PyErr_BadInternalCall();
2951 return NULL;
2952 }
2953 buf = PyString_AsString(result);
2954 len = PyString_Size(result);
2955 if (buf[len-1] == 'L') {
2956 --len;
2957 buf[len] = '\0';
2958 }
2959 sign = buf[0] == '-';
2960 numnondigits += sign;
2961 numdigits = len - numnondigits;
2962 assert(numdigits > 0);
2963
Tim Petersfff53252001-04-12 18:38:48 +00002964 /* Get rid of base marker unless F_ALT */
2965 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002966 /* Need to skip 0x, 0X or 0. */
2967 int skipped = 0;
2968 switch (type) {
2969 case 'o':
2970 assert(buf[sign] == '0');
2971 /* If 0 is only digit, leave it alone. */
2972 if (numdigits > 1) {
2973 skipped = 1;
2974 --numdigits;
2975 }
2976 break;
2977 case 'x':
2978 case 'X':
2979 assert(buf[sign] == '0');
2980 assert(buf[sign + 1] == 'x');
2981 skipped = 2;
2982 numnondigits -= 2;
2983 break;
2984 }
2985 if (skipped) {
2986 buf += skipped;
2987 len -= skipped;
2988 if (sign)
2989 buf[0] = '-';
2990 }
2991 assert(len == numnondigits + numdigits);
2992 assert(numdigits > 0);
2993 }
2994
2995 /* Fill with leading zeroes to meet minimum width. */
2996 if (prec > numdigits) {
2997 PyObject *r1 = PyString_FromStringAndSize(NULL,
2998 numnondigits + prec);
2999 char *b1;
3000 if (!r1) {
3001 Py_DECREF(result);
3002 return NULL;
3003 }
3004 b1 = PyString_AS_STRING(r1);
3005 for (i = 0; i < numnondigits; ++i)
3006 *b1++ = *buf++;
3007 for (i = 0; i < prec - numdigits; i++)
3008 *b1++ = '0';
3009 for (i = 0; i < numdigits; i++)
3010 *b1++ = *buf++;
3011 *b1 = '\0';
3012 Py_DECREF(result);
3013 result = r1;
3014 buf = PyString_AS_STRING(result);
3015 len = numnondigits + prec;
3016 }
3017
3018 /* Fix up case for hex conversions. */
3019 switch (type) {
3020 case 'x':
3021 /* Need to convert all upper case letters to lower case. */
3022 for (i = 0; i < len; i++)
3023 if (buf[i] >= 'A' && buf[i] <= 'F')
3024 buf[i] += 'a'-'A';
3025 break;
3026 case 'X':
3027 /* Need to convert 0x to 0X (and -0x to -0X). */
3028 if (buf[sign + 1] == 'x')
3029 buf[sign + 1] = 'X';
3030 break;
3031 }
3032 *pbuf = buf;
3033 *plen = len;
3034 return result;
3035}
3036
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003037static int
Fred Drakeba096332000-07-09 07:04:36 +00003038formatint(char *buf, size_t buflen, int flags,
3039 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003040{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003041 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003042 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3043 + 1 + 1 = 24 */
3044 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003045 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003046
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003047 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003048 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003049 if (prec < 0)
3050 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003051
3052 if ((flags & F_ALT) &&
3053 (type == 'x' || type == 'X')) {
3054 /* When converting under %#x or %#X, there are a number
3055 * of issues that cause pain:
3056 * - when 0 is being converted, the C standard leaves off
3057 * the '0x' or '0X', which is inconsistent with other
3058 * %#x/%#X conversions and inconsistent with Python's
3059 * hex() function
3060 * - there are platforms that violate the standard and
3061 * convert 0 with the '0x' or '0X'
3062 * (Metrowerks, Compaq Tru64)
3063 * - there are platforms that give '0x' when converting
3064 * under %#X, but convert 0 in accordance with the
3065 * standard (OS/2 EMX)
3066 *
3067 * We can achieve the desired consistency by inserting our
3068 * own '0x' or '0X' prefix, and substituting %x/%X in place
3069 * of %#x/%#X.
3070 *
3071 * Note that this is the same approach as used in
3072 * formatint() in unicodeobject.c
3073 */
3074 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
3075 type, prec, type);
3076 }
3077 else {
3078 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3079 (flags&F_ALT) ? "#" : "",
3080 prec, type);
3081 }
3082
Tim Peters38fd5b62000-09-21 05:43:11 +00003083 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003084 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3085 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003086 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003087 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003088 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003089 return -1;
3090 }
Tim Peters885d4572001-11-28 20:27:42 +00003091 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003092 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003093}
3094
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003095static int
Fred Drakeba096332000-07-09 07:04:36 +00003096formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003097{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003098 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003099 if (PyString_Check(v)) {
3100 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003101 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003102 }
3103 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003104 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003105 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003106 }
3107 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003108 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003109}
3110
Guido van Rossum013142a1994-08-30 08:19:36 +00003111
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003112/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3113
3114 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3115 chars are formatted. XXX This is a magic number. Each formatting
3116 routine does bounds checking to ensure no overflow, but a better
3117 solution may be to malloc a buffer of appropriate size for each
3118 format. For now, the current solution is sufficient.
3119*/
3120#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003121
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003122PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003123PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003124{
3125 char *fmt, *res;
3126 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003127 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003128 PyObject *result, *orig_args;
3129#ifdef Py_USING_UNICODE
3130 PyObject *v, *w;
3131#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003132 PyObject *dict = NULL;
3133 if (format == NULL || !PyString_Check(format) || args == NULL) {
3134 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003135 return NULL;
3136 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003137 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003138 fmt = PyString_AS_STRING(format);
3139 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003140 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003141 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003142 if (result == NULL)
3143 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003144 res = PyString_AsString(result);
3145 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003146 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003147 argidx = 0;
3148 }
3149 else {
3150 arglen = -1;
3151 argidx = -2;
3152 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003153 if (args->ob_type->tp_as_mapping)
3154 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003155 while (--fmtcnt >= 0) {
3156 if (*fmt != '%') {
3157 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003158 rescnt = fmtcnt + 100;
3159 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003160 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003161 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003162 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003163 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003164 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003165 }
3166 *res++ = *fmt++;
3167 }
3168 else {
3169 /* Got a format specifier */
3170 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003171 int width = -1;
3172 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003173 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003174 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003175 PyObject *v = NULL;
3176 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003177 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003178 int sign;
3179 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003180 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003181#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003182 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003183 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003184#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003185
Guido van Rossumda9c2711996-12-05 21:58:58 +00003186 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003187 if (*fmt == '(') {
3188 char *keystart;
3189 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003190 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003191 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003192
3193 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003194 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003195 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003196 goto error;
3197 }
3198 ++fmt;
3199 --fmtcnt;
3200 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003201 /* Skip over balanced parentheses */
3202 while (pcount > 0 && --fmtcnt >= 0) {
3203 if (*fmt == ')')
3204 --pcount;
3205 else if (*fmt == '(')
3206 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003207 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003208 }
3209 keylen = fmt - keystart - 1;
3210 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003211 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003212 "incomplete format key");
3213 goto error;
3214 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003215 key = PyString_FromStringAndSize(keystart,
3216 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003217 if (key == NULL)
3218 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003219 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003220 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003221 args_owned = 0;
3222 }
3223 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003224 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003225 if (args == NULL) {
3226 goto error;
3227 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003228 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003229 arglen = -1;
3230 argidx = -2;
3231 }
Guido van Rossume5372401993-03-16 12:15:04 +00003232 while (--fmtcnt >= 0) {
3233 switch (c = *fmt++) {
3234 case '-': flags |= F_LJUST; continue;
3235 case '+': flags |= F_SIGN; continue;
3236 case ' ': flags |= F_BLANK; continue;
3237 case '#': flags |= F_ALT; continue;
3238 case '0': flags |= F_ZERO; continue;
3239 }
3240 break;
3241 }
3242 if (c == '*') {
3243 v = getnextarg(args, arglen, &argidx);
3244 if (v == NULL)
3245 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003246 if (!PyInt_Check(v)) {
3247 PyErr_SetString(PyExc_TypeError,
3248 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003249 goto error;
3250 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003251 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003252 if (width < 0) {
3253 flags |= F_LJUST;
3254 width = -width;
3255 }
Guido van Rossume5372401993-03-16 12:15:04 +00003256 if (--fmtcnt >= 0)
3257 c = *fmt++;
3258 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003259 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003260 width = c - '0';
3261 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003262 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003263 if (!isdigit(c))
3264 break;
3265 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003266 PyErr_SetString(
3267 PyExc_ValueError,
3268 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003269 goto error;
3270 }
3271 width = width*10 + (c - '0');
3272 }
3273 }
3274 if (c == '.') {
3275 prec = 0;
3276 if (--fmtcnt >= 0)
3277 c = *fmt++;
3278 if (c == '*') {
3279 v = getnextarg(args, arglen, &argidx);
3280 if (v == NULL)
3281 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003282 if (!PyInt_Check(v)) {
3283 PyErr_SetString(
3284 PyExc_TypeError,
3285 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003286 goto error;
3287 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003288 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003289 if (prec < 0)
3290 prec = 0;
3291 if (--fmtcnt >= 0)
3292 c = *fmt++;
3293 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003294 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003295 prec = c - '0';
3296 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003297 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003298 if (!isdigit(c))
3299 break;
3300 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003301 PyErr_SetString(
3302 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003303 "prec too big");
3304 goto error;
3305 }
3306 prec = prec*10 + (c - '0');
3307 }
3308 }
3309 } /* prec */
3310 if (fmtcnt >= 0) {
3311 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003312 if (--fmtcnt >= 0)
3313 c = *fmt++;
3314 }
3315 }
3316 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003317 PyErr_SetString(PyExc_ValueError,
3318 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003319 goto error;
3320 }
3321 if (c != '%') {
3322 v = getnextarg(args, arglen, &argidx);
3323 if (v == NULL)
3324 goto error;
3325 }
3326 sign = 0;
3327 fill = ' ';
3328 switch (c) {
3329 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003330 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003331 len = 1;
3332 break;
3333 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003334 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003335#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003336 if (PyUnicode_Check(v)) {
3337 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003338 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003339 goto unicode;
3340 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003341#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003342 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003343 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003344 else
3345 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003346 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003347 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003348 if (!PyString_Check(temp)) {
3349 PyErr_SetString(PyExc_TypeError,
3350 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003351 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003352 goto error;
3353 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003354 pbuf = PyString_AS_STRING(temp);
3355 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003356 if (prec >= 0 && len > prec)
3357 len = prec;
3358 break;
3359 case 'i':
3360 case 'd':
3361 case 'u':
3362 case 'o':
3363 case 'x':
3364 case 'X':
3365 if (c == 'i')
3366 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003367 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003368 temp = _PyString_FormatLong(v, flags,
3369 prec, c, &pbuf, &len);
3370 if (!temp)
3371 goto error;
3372 /* unbounded ints can always produce
3373 a sign character! */
3374 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003375 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003376 else {
3377 pbuf = formatbuf;
3378 len = formatint(pbuf, sizeof(formatbuf),
3379 flags, prec, c, v);
3380 if (len < 0)
3381 goto error;
3382 /* only d conversion is signed */
3383 sign = c == 'd';
3384 }
3385 if (flags & F_ZERO)
3386 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003387 break;
3388 case 'e':
3389 case 'E':
3390 case 'f':
3391 case 'g':
3392 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003393 pbuf = formatbuf;
3394 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003395 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003396 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003397 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003398 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003399 fill = '0';
3400 break;
3401 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003402 pbuf = formatbuf;
3403 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003404 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003405 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003406 break;
3407 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003408 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003409 "unsupported format character '%c' (0x%x) "
3410 "at index %i",
3411 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003412 goto error;
3413 }
3414 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003415 if (*pbuf == '-' || *pbuf == '+') {
3416 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003417 len--;
3418 }
3419 else if (flags & F_SIGN)
3420 sign = '+';
3421 else if (flags & F_BLANK)
3422 sign = ' ';
3423 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003424 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003425 }
3426 if (width < len)
3427 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003428 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003429 reslen -= rescnt;
3430 rescnt = width + fmtcnt + 100;
3431 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003432 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003433 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003434 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003435 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003436 }
3437 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003438 if (fill != ' ')
3439 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003440 rescnt--;
3441 if (width > len)
3442 width--;
3443 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003444 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3445 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003446 assert(pbuf[1] == c);
3447 if (fill != ' ') {
3448 *res++ = *pbuf++;
3449 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003450 }
Tim Petersfff53252001-04-12 18:38:48 +00003451 rescnt -= 2;
3452 width -= 2;
3453 if (width < 0)
3454 width = 0;
3455 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003456 }
3457 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003458 do {
3459 --rescnt;
3460 *res++ = fill;
3461 } while (--width > len);
3462 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003463 if (fill == ' ') {
3464 if (sign)
3465 *res++ = sign;
3466 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003467 (c == 'x' || c == 'X')) {
3468 assert(pbuf[0] == '0');
3469 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003470 *res++ = *pbuf++;
3471 *res++ = *pbuf++;
3472 }
3473 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003474 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003475 res += len;
3476 rescnt -= len;
3477 while (--width >= len) {
3478 --rescnt;
3479 *res++ = ' ';
3480 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003481 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003482 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003483 "not all arguments converted");
3484 goto error;
3485 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003486 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003487 } /* '%' */
3488 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003489 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003490 PyErr_SetString(PyExc_TypeError,
3491 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003492 goto error;
3493 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003494 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003495 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003496 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003497 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003498 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003499
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003500#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003501 unicode:
3502 if (args_owned) {
3503 Py_DECREF(args);
3504 args_owned = 0;
3505 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003506 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003507 if (PyTuple_Check(orig_args) && argidx > 0) {
3508 PyObject *v;
3509 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3510 v = PyTuple_New(n);
3511 if (v == NULL)
3512 goto error;
3513 while (--n >= 0) {
3514 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3515 Py_INCREF(w);
3516 PyTuple_SET_ITEM(v, n, w);
3517 }
3518 args = v;
3519 } else {
3520 Py_INCREF(orig_args);
3521 args = orig_args;
3522 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003523 args_owned = 1;
3524 /* Take what we have of the result and let the Unicode formatting
3525 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003526 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003527 if (_PyString_Resize(&result, rescnt))
3528 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003529 fmtcnt = PyString_GET_SIZE(format) - \
3530 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003531 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3532 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003533 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003534 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003535 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003536 if (v == NULL)
3537 goto error;
3538 /* Paste what we have (result) to what the Unicode formatting
3539 function returned (v) and return the result (or error) */
3540 w = PyUnicode_Concat(result, v);
3541 Py_DECREF(result);
3542 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003543 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003544 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003545#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003546
Guido van Rossume5372401993-03-16 12:15:04 +00003547 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003548 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003549 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003550 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003551 }
Guido van Rossume5372401993-03-16 12:15:04 +00003552 return NULL;
3553}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003554
3555
Guido van Rossum2a61e741997-01-18 07:55:05 +00003556
Barry Warsaw4df762f2000-08-16 23:41:01 +00003557/* This dictionary will leak at PyString_Fini() time. That's acceptable
3558 * because PyString_Fini() specifically frees interned strings that are
3559 * only referenced by this dictionary. The CVS log entry for revision 2.45
3560 * says:
3561 *
3562 * Change the Fini function to only remove otherwise unreferenced
3563 * strings from the interned table. There are references in
3564 * hard-to-find static variables all over the interpreter, and it's not
3565 * worth trying to get rid of all those; but "uninterning" isn't fair
3566 * either and may cause subtle failures later -- so we have to keep them
3567 * in the interned table.
3568 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003569static PyObject *interned;
3570
3571void
Fred Drakeba096332000-07-09 07:04:36 +00003572PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003573{
3574 register PyStringObject *s = (PyStringObject *)(*p);
3575 PyObject *t;
3576 if (s == NULL || !PyString_Check(s))
3577 Py_FatalError("PyString_InternInPlace: strings only please!");
3578 if ((t = s->ob_sinterned) != NULL) {
3579 if (t == (PyObject *)s)
3580 return;
3581 Py_INCREF(t);
3582 *p = t;
3583 Py_DECREF(s);
3584 return;
3585 }
3586 if (interned == NULL) {
3587 interned = PyDict_New();
3588 if (interned == NULL)
3589 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003590 }
3591 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3592 Py_INCREF(t);
3593 *p = s->ob_sinterned = t;
3594 Py_DECREF(s);
3595 return;
3596 }
Tim Peters111f6092001-09-12 07:54:51 +00003597 /* Ensure that only true string objects appear in the intern dict,
3598 and as the value of ob_sinterned. */
3599 if (PyString_CheckExact(s)) {
3600 t = (PyObject *)s;
3601 if (PyDict_SetItem(interned, t, t) == 0) {
3602 s->ob_sinterned = t;
3603 return;
3604 }
3605 }
3606 else {
3607 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3608 PyString_GET_SIZE(s));
3609 if (t != NULL) {
3610 if (PyDict_SetItem(interned, t, t) == 0) {
3611 *p = s->ob_sinterned = t;
3612 Py_DECREF(s);
3613 return;
3614 }
3615 Py_DECREF(t);
3616 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003617 }
3618 PyErr_Clear();
3619}
3620
3621
3622PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003623PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003624{
3625 PyObject *s = PyString_FromString(cp);
3626 if (s == NULL)
3627 return NULL;
3628 PyString_InternInPlace(&s);
3629 return s;
3630}
3631
Guido van Rossum8cf04761997-08-02 02:57:45 +00003632void
Fred Drakeba096332000-07-09 07:04:36 +00003633PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003634{
3635 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003636 for (i = 0; i < UCHAR_MAX + 1; i++) {
3637 Py_XDECREF(characters[i]);
3638 characters[i] = NULL;
3639 }
3640#ifndef DONT_SHARE_SHORT_STRINGS
3641 Py_XDECREF(nullstring);
3642 nullstring = NULL;
3643#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003644 if (interned) {
3645 int pos, changed;
3646 PyObject *key, *value;
3647 do {
3648 changed = 0;
3649 pos = 0;
3650 while (PyDict_Next(interned, &pos, &key, &value)) {
3651 if (key->ob_refcnt == 2 && key == value) {
3652 PyDict_DelItem(interned, key);
3653 changed = 1;
3654 }
3655 }
3656 } while (changed);
3657 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003658}
Barry Warsawa903ad982001-02-23 16:40:48 +00003659
Barry Warsawa903ad982001-02-23 16:40:48 +00003660void _Py_ReleaseInternedStrings(void)
3661{
3662 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003663 fprintf(stderr, "releasing interned strings\n");
3664 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003665 Py_DECREF(interned);
3666 interned = NULL;
3667 }
3668}