blob: 144c5b09c98bab9a08fe4d479231d669d0cd3d13 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000017static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000018
19/*
Martin v. Löwis1f803f72002-01-16 10:53:24 +000020 For both PyString_FromString() and PyString_FromStringAndSize(), the
21 parameter `size' denotes number of characters to allocate, not counting any
22 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000023
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 For PyString_FromString(), the parameter `str' points to a null-terminated
25 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000026
Martin v. Löwis1f803f72002-01-16 10:53:24 +000027 For PyString_FromStringAndSize(), the parameter the parameter `str' is
28 either NULL or else points to a string containing at least `size' bytes. For
29 PyString_FromStringAndSize(), the string in the `str' parameter does not
30 have to be null-terminated. (Therefore it is safe to construct a substring
31 by calling `PyString_FromStringAndSize(origstring, substrlen)'.) If `str'
32 is NULL then PyString_FromStringAndSize() will allocate `size+1' bytes
33 (setting the last byte to the null terminating character) and you can fill in
34 the data yourself. If `str' is non-NULL then the resulting PyString object
35 must be treated as immutable and you must not fill in nor alter the data
36 yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000037
Martin v. Löwis1f803f72002-01-16 10:53:24 +000038 The PyObject member `op->ob_size', which denotes the number of "extra items"
39 in a variable-size object, will contain the number of bytes allocated for
40 string data, not counting the null terminating character. It is therefore
41 equal to the equal to the `size' parameter (for PyString_FromStringAndSize())
42 or the length of the string in the `str' parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +000067 _PyMalloc_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +0000123 _PyMalloc_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
159 count = vargs;
160#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000161 /* step 1: figure out how large a buffer we need */
162 for (f = format; *f; f++) {
163 if (*f == '%') {
164 const char* p = f;
165 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
166 ;
167
168 /* skip the 'l' in %ld, since it doesn't change the
169 width. although only %d is supported (see
170 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000171 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000172 if (*f == 'l' && *(f+1) == 'd')
173 ++f;
174
175 switch (*f) {
176 case 'c':
177 (void)va_arg(count, int);
178 /* fall through... */
179 case '%':
180 n++;
181 break;
182 case 'd': case 'i': case 'x':
183 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000184 /* 20 bytes is enough to hold a 64-bit
185 integer. Decimal takes the most space.
186 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 n += 20;
188 break;
189 case 's':
190 s = va_arg(count, char*);
191 n += strlen(s);
192 break;
193 case 'p':
194 (void) va_arg(count, int);
195 /* maximum 64-bit pointer representation:
196 * 0xffffffffffffffff
197 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000198 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000199 */
200 n += 19;
201 break;
202 default:
203 /* if we stumble upon an unknown
204 formatting code, copy the rest of
205 the format string to the output
206 string. (we cannot just skip the
207 code, since there's no way to know
208 what's in the argument list) */
209 n += strlen(p);
210 goto expand;
211 }
212 } else
213 n++;
214 }
215 expand:
216 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000217 /* Since we've analyzed how much space we need for the worst case,
218 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 string = PyString_FromStringAndSize(NULL, n);
220 if (!string)
221 return NULL;
222
223 s = PyString_AsString(string);
224
225 for (f = format; *f; f++) {
226 if (*f == '%') {
227 const char* p = f++;
228 int i, longflag = 0;
229 /* parse the width.precision part (we're only
230 interested in the precision value, if any) */
231 n = 0;
232 while (isdigit(Py_CHARMASK(*f)))
233 n = (n*10) + *f++ - '0';
234 if (*f == '.') {
235 f++;
236 n = 0;
237 while (isdigit(Py_CHARMASK(*f)))
238 n = (n*10) + *f++ - '0';
239 }
240 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
241 f++;
242 /* handle the long flag, but only for %ld. others
243 can be added when necessary. */
244 if (*f == 'l' && *(f+1) == 'd') {
245 longflag = 1;
246 ++f;
247 }
248
249 switch (*f) {
250 case 'c':
251 *s++ = va_arg(vargs, int);
252 break;
253 case 'd':
254 if (longflag)
255 sprintf(s, "%ld", va_arg(vargs, long));
256 else
257 sprintf(s, "%d", va_arg(vargs, int));
258 s += strlen(s);
259 break;
260 case 'i':
261 sprintf(s, "%i", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'x':
265 sprintf(s, "%x", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 's':
269 p = va_arg(vargs, char*);
270 i = strlen(p);
271 if (n > 0 && i > n)
272 i = n;
273 memcpy(s, p, i);
274 s += i;
275 break;
276 case 'p':
277 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000278 /* %p is ill-defined: ensure leading 0x. */
279 if (s[1] == 'X')
280 s[1] = 'x';
281 else if (s[1] != 'x') {
282 memmove(s+2, s, strlen(s)+1);
283 s[0] = '0';
284 s[1] = 'x';
285 }
Barry Warsawdadace02001-08-24 18:32:06 +0000286 s += strlen(s);
287 break;
288 case '%':
289 *s++ = '%';
290 break;
291 default:
292 strcpy(s, p);
293 s += strlen(s);
294 goto end;
295 }
296 } else
297 *s++ = *f;
298 }
299
300 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000301 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000302 return string;
303}
304
305PyObject *
306PyString_FromFormat(const char *format, ...)
307{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000308 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000309 va_list vargs;
310
311#ifdef HAVE_STDARG_PROTOTYPES
312 va_start(vargs, format);
313#else
314 va_start(vargs);
315#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 ret = PyString_FromFormatV(format, vargs);
317 va_end(vargs);
318 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319}
320
321
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000322PyObject *PyString_Decode(const char *s,
323 int size,
324 const char *encoding,
325 const char *errors)
326{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000327 PyObject *v, *str;
328
329 str = PyString_FromStringAndSize(s, size);
330 if (str == NULL)
331 return NULL;
332 v = PyString_AsDecodedString(str, encoding, errors);
333 Py_DECREF(str);
334 return v;
335}
336
337PyObject *PyString_AsDecodedObject(PyObject *str,
338 const char *encoding,
339 const char *errors)
340{
341 PyObject *v;
342
343 if (!PyString_Check(str)) {
344 PyErr_BadArgument();
345 goto onError;
346 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000347
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000348 if (encoding == NULL) {
349#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000351#else
352 PyErr_SetString(PyExc_ValueError, "no encoding specified");
353 goto onError;
354#endif
355 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356
357 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000358 v = PyCodec_Decode(str, encoding, errors);
359 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361
362 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000363
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365 return NULL;
366}
367
368PyObject *PyString_AsDecodedString(PyObject *str,
369 const char *encoding,
370 const char *errors)
371{
372 PyObject *v;
373
374 v = PyString_AsDecodedObject(str, encoding, errors);
375 if (v == NULL)
376 goto onError;
377
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000378#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000379 /* Convert Unicode to a string using the default encoding */
380 if (PyUnicode_Check(v)) {
381 PyObject *temp = v;
382 v = PyUnicode_AsEncodedString(v, NULL, NULL);
383 Py_DECREF(temp);
384 if (v == NULL)
385 goto onError;
386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 if (!PyString_Check(v)) {
389 PyErr_Format(PyExc_TypeError,
390 "decoder did not return a string object (type=%.400s)",
391 v->ob_type->tp_name);
392 Py_DECREF(v);
393 goto onError;
394 }
395
396 return v;
397
398 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 return NULL;
400}
401
402PyObject *PyString_Encode(const char *s,
403 int size,
404 const char *encoding,
405 const char *errors)
406{
407 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 str = PyString_FromStringAndSize(s, size);
410 if (str == NULL)
411 return NULL;
412 v = PyString_AsEncodedString(str, encoding, errors);
413 Py_DECREF(str);
414 return v;
415}
416
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000417PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 const char *encoding,
419 const char *errors)
420{
421 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000422
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000423 if (!PyString_Check(str)) {
424 PyErr_BadArgument();
425 goto onError;
426 }
427
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000428 if (encoding == NULL) {
429#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000431#else
432 PyErr_SetString(PyExc_ValueError, "no encoding specified");
433 goto onError;
434#endif
435 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436
437 /* Encode via the codec registry */
438 v = PyCodec_Encode(str, encoding, errors);
439 if (v == NULL)
440 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000441
442 return v;
443
444 onError:
445 return NULL;
446}
447
448PyObject *PyString_AsEncodedString(PyObject *str,
449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v;
453
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000454 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000455 if (v == NULL)
456 goto onError;
457
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000458#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 /* Convert Unicode to a string using the default encoding */
460 if (PyUnicode_Check(v)) {
461 PyObject *temp = v;
462 v = PyUnicode_AsEncodedString(v, NULL, NULL);
463 Py_DECREF(temp);
464 if (v == NULL)
465 goto onError;
466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(v)) {
469 PyErr_Format(PyExc_TypeError,
470 "encoder did not return a string object (type=%.400s)",
471 v->ob_type->tp_name);
472 Py_DECREF(v);
473 goto onError;
474 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000476 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000477
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 onError:
479 return NULL;
480}
481
Guido van Rossum234f9421993-06-17 12:35:49 +0000482static void
Fred Drakeba096332000-07-09 07:04:36 +0000483string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000484{
Guido van Rossum9475a232001-10-05 20:51:39 +0000485 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000486}
487
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000488static int
489string_getsize(register PyObject *op)
490{
491 char *s;
492 int len;
493 if (PyString_AsStringAndSize(op, &s, &len))
494 return -1;
495 return len;
496}
497
498static /*const*/ char *
499string_getbuffer(register PyObject *op)
500{
501 char *s;
502 int len;
503 if (PyString_AsStringAndSize(op, &s, &len))
504 return NULL;
505 return s;
506}
507
Guido van Rossumd7047b31995-01-02 19:07:15 +0000508int
Fred Drakeba096332000-07-09 07:04:36 +0000509PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000511 if (!PyString_Check(op))
512 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000513 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
516/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000517PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519 if (!PyString_Check(op))
520 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000521 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522}
523
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000524int
525PyString_AsStringAndSize(register PyObject *obj,
526 register char **s,
527 register int *len)
528{
529 if (s == NULL) {
530 PyErr_BadInternalCall();
531 return -1;
532 }
533
534 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000536 if (PyUnicode_Check(obj)) {
537 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
538 if (obj == NULL)
539 return -1;
540 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000541 else
542#endif
543 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000544 PyErr_Format(PyExc_TypeError,
545 "expected string or Unicode object, "
546 "%.200s found", obj->ob_type->tp_name);
547 return -1;
548 }
549 }
550
551 *s = PyString_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyString_GET_SIZE(obj);
554 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected string without null bytes");
557 return -1;
558 }
559 return 0;
560}
561
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562/* Methods */
563
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564static int
Fred Drakeba096332000-07-09 07:04:36 +0000565string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566{
567 int i;
568 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000570
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000571 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000572 if (! PyString_CheckExact(op)) {
573 int ret;
574 /* A str subclass may have its own __str__ method. */
575 op = (PyStringObject *) PyObject_Str((PyObject *)op);
576 if (op == NULL)
577 return -1;
578 ret = string_print(op, fp, flags);
579 Py_DECREF(op);
580 return ret;
581 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000582 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000584 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000586
Thomas Wouters7e474022000-07-16 12:04:32 +0000587 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000588 quote = '\'';
Martin v. Löwis1f803f72002-01-16 10:53:24 +0000589 if (memchr(op->ob_sval, '\'', op->ob_size) && !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000590 quote = '"';
591
592 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 for (i = 0; i < op->ob_size; i++) {
594 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000595 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000596 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000597 else if (c == '\t')
598 fprintf(fp, "\\t");
599 else if (c == '\n')
600 fprintf(fp, "\\n");
601 else if (c == '\r')
602 fprintf(fp, "\\r");
603 else if (c < ' ' || c >= 0x7f)
604 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000606 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000608 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000609 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000610}
611
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000612static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000613string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000614{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000615 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
616 PyObject *v;
617 if (newsize > INT_MAX) {
618 PyErr_SetString(PyExc_OverflowError,
619 "string is too large to make repr");
620 }
621 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000622 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000623 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 }
625 else {
626 register int i;
627 register char c;
628 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000629 int quote;
630
Thomas Wouters7e474022000-07-16 12:04:32 +0000631 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000632 quote = '\'';
Martin v. Löwis1f803f72002-01-16 10:53:24 +0000633 if (memchr(op->ob_sval, '\'', op->ob_size) && !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000634 quote = '"';
635
Tim Peters9161c8b2001-12-03 01:55:38 +0000636 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000637 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000638 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000639 /* There's at least enough room for a hex escape
640 and a closing quote. */
641 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000642 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000643 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000645 else if (c == '\t')
646 *p++ = '\\', *p++ = 't';
647 else if (c == '\n')
648 *p++ = '\\', *p++ = 'n';
649 else if (c == '\r')
650 *p++ = '\\', *p++ = 'r';
651 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000652 /* For performance, we don't want to call
653 PyOS_snprintf here (extra layers of
654 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000655 sprintf(p, "\\x%02x", c & 0xff);
656 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000657 }
658 else
659 *p++ = c;
660 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000661 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000662 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000664 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000665 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000666 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000668}
669
Guido van Rossum189f1df2001-05-01 16:51:53 +0000670static PyObject *
671string_str(PyObject *s)
672{
Tim Petersc9933152001-10-16 20:18:24 +0000673 assert(PyString_Check(s));
674 if (PyString_CheckExact(s)) {
675 Py_INCREF(s);
676 return s;
677 }
678 else {
679 /* Subtype -- return genuine string with the same value. */
680 PyStringObject *t = (PyStringObject *) s;
681 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
682 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000683}
684
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000685static int
Fred Drakeba096332000-07-09 07:04:36 +0000686string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687{
688 return a->ob_size;
689}
690
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000691static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000692string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693{
694 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000695 register PyStringObject *op;
696 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000697#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000698 if (PyUnicode_Check(bb))
699 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000700#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000701 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000702 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000703 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704 return NULL;
705 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000708 if ((a->ob_size == 0 || b->ob_size == 0) &&
709 PyString_CheckExact(a) && PyString_CheckExact(b)) {
710 if (a->ob_size == 0) {
711 Py_INCREF(bb);
712 return bb;
713 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000714 Py_INCREF(a);
715 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716 }
717 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000718 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000719 op = (PyStringObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +0000720 _PyMalloc_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000721 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000723 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000724 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000725 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000726 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
727 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
728 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730#undef b
731}
732
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000733static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000734string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000735{
736 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000737 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000738 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000739 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000740 if (n < 0)
741 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000742 /* watch out for overflows: the size can overflow int,
743 * and the # of bytes needed can overflow size_t
744 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000746 if (n && size / n != a->ob_size) {
747 PyErr_SetString(PyExc_OverflowError,
748 "repeated string is too long");
749 return NULL;
750 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000751 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752 Py_INCREF(a);
753 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000754 }
Tim Peters8f422462000-09-09 06:13:41 +0000755 nbytes = size * sizeof(char);
756 if (nbytes / sizeof(char) != (size_t)size ||
757 nbytes + sizeof(PyStringObject) <= nbytes) {
758 PyErr_SetString(PyExc_OverflowError,
759 "repeated string is too long");
760 return NULL;
761 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000762 op = (PyStringObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +0000763 _PyMalloc_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000764 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000765 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000766 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000767 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000768 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000769 for (i = 0; i < size; i += a->ob_size)
770 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
771 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000772 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000773}
774
775/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
776
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000777static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000778string_slice(register PyStringObject *a, register int i, register int j)
779 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000780{
781 if (i < 0)
782 i = 0;
783 if (j < 0)
784 j = 0; /* Avoid signed/unsigned bug in next line */
785 if (j > a->ob_size)
786 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000787 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
788 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000789 Py_INCREF(a);
790 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
792 if (j < i)
793 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000794 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000795}
796
Guido van Rossum9284a572000-03-07 15:53:43 +0000797static int
Fred Drakeba096332000-07-09 07:04:36 +0000798string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000799{
800 register char *s, *end;
801 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000802#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000803 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000804 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000805#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000806 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000807 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000808 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000809 return -1;
810 }
811 c = PyString_AsString(el)[0];
812 s = PyString_AsString(a);
813 end = s + PyString_Size(a);
814 while (s < end) {
815 if (c == *s++)
816 return 1;
817 }
818 return 0;
819}
820
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000821static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000822string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000824 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000825 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000826 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000827 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 return NULL;
829 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000830 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000831 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000832 if (v == NULL)
833 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000834 else {
835#ifdef COUNT_ALLOCS
836 one_strings++;
837#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000838 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000839 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000840 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841}
842
Martin v. Löwiscd353062001-05-24 16:56:35 +0000843static PyObject*
844string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000846 int c;
847 int len_a, len_b;
848 int min_len;
849 PyObject *result;
850
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000851 /* Make sure both arguments are strings. */
852 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000853 result = Py_NotImplemented;
854 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000855 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000856 if (a == b) {
857 switch (op) {
858 case Py_EQ:case Py_LE:case Py_GE:
859 result = Py_True;
860 goto out;
861 case Py_NE:case Py_LT:case Py_GT:
862 result = Py_False;
863 goto out;
864 }
865 }
866 if (op == Py_EQ) {
867 /* Supporting Py_NE here as well does not save
868 much time, since Py_NE is rarely used. */
869 if (a->ob_size == b->ob_size
870 && (a->ob_sval[0] == b->ob_sval[0]
871 && memcmp(a->ob_sval, b->ob_sval,
872 a->ob_size) == 0)) {
873 result = Py_True;
874 } else {
875 result = Py_False;
876 }
877 goto out;
878 }
879 len_a = a->ob_size; len_b = b->ob_size;
880 min_len = (len_a < len_b) ? len_a : len_b;
881 if (min_len > 0) {
882 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
883 if (c==0)
884 c = memcmp(a->ob_sval, b->ob_sval, min_len);
885 }else
886 c = 0;
887 if (c == 0)
888 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
889 switch (op) {
890 case Py_LT: c = c < 0; break;
891 case Py_LE: c = c <= 0; break;
892 case Py_EQ: assert(0); break; /* unreachable */
893 case Py_NE: c = c != 0; break;
894 case Py_GT: c = c > 0; break;
895 case Py_GE: c = c >= 0; break;
896 default:
897 result = Py_NotImplemented;
898 goto out;
899 }
900 result = c ? Py_True : Py_False;
901 out:
902 Py_INCREF(result);
903 return result;
904}
905
906int
907_PyString_Eq(PyObject *o1, PyObject *o2)
908{
909 PyStringObject *a, *b;
910 a = (PyStringObject*)o1;
911 b = (PyStringObject*)o2;
912 return a->ob_size == b->ob_size
913 && *a->ob_sval == *b->ob_sval
914 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915}
916
Guido van Rossum9bfef441993-03-29 10:43:31 +0000917static long
Fred Drakeba096332000-07-09 07:04:36 +0000918string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000920 register int len;
921 register unsigned char *p;
922 register long x;
923
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000924 if (a->ob_shash != -1)
925 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000926 if (a->ob_sinterned != NULL)
927 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000929 len = a->ob_size;
930 p = (unsigned char *) a->ob_sval;
931 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000932 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000933 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000934 x ^= a->ob_size;
935 if (x == -1)
936 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000937 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000938 return x;
939}
940
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000941static int
Fred Drakeba096332000-07-09 07:04:36 +0000942string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000943{
944 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000945 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000946 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000947 return -1;
948 }
949 *ptr = (void *)self->ob_sval;
950 return self->ob_size;
951}
952
953static int
Fred Drakeba096332000-07-09 07:04:36 +0000954string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000955{
Guido van Rossum045e6881997-09-08 18:30:11 +0000956 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000957 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000958 return -1;
959}
960
961static int
Fred Drakeba096332000-07-09 07:04:36 +0000962string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000963{
964 if ( lenp )
965 *lenp = self->ob_size;
966 return 1;
967}
968
Guido van Rossum1db70701998-10-08 02:18:52 +0000969static int
Fred Drakeba096332000-07-09 07:04:36 +0000970string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000971{
972 if ( index != 0 ) {
973 PyErr_SetString(PyExc_SystemError,
974 "accessing non-existent string segment");
975 return -1;
976 }
977 *ptr = self->ob_sval;
978 return self->ob_size;
979}
980
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000982 (inquiry)string_length, /*sq_length*/
983 (binaryfunc)string_concat, /*sq_concat*/
984 (intargfunc)string_repeat, /*sq_repeat*/
985 (intargfunc)string_item, /*sq_item*/
986 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000987 0, /*sq_ass_item*/
988 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000989 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990};
991
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000992static PyBufferProcs string_as_buffer = {
993 (getreadbufferproc)string_buffer_getreadbuf,
994 (getwritebufferproc)string_buffer_getwritebuf,
995 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000996 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000997};
998
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000999
1000
1001#define LEFTSTRIP 0
1002#define RIGHTSTRIP 1
1003#define BOTHSTRIP 2
1004
1005
1006static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001007split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001008{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001009 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001010 PyObject* item;
1011 PyObject *list = PyList_New(0);
1012
1013 if (list == NULL)
1014 return NULL;
1015
Guido van Rossum4c08d552000-03-10 22:55:18 +00001016 for (i = j = 0; i < len; ) {
1017 while (i < len && isspace(Py_CHARMASK(s[i])))
1018 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001019 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001020 while (i < len && !isspace(Py_CHARMASK(s[i])))
1021 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001022 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001023 if (maxsplit-- <= 0)
1024 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001025 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1026 if (item == NULL)
1027 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028 err = PyList_Append(list, item);
1029 Py_DECREF(item);
1030 if (err < 0)
1031 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001032 while (i < len && isspace(Py_CHARMASK(s[i])))
1033 i++;
1034 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001035 }
1036 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001037 if (j < len) {
1038 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1039 if (item == NULL)
1040 goto finally;
1041 err = PyList_Append(list, item);
1042 Py_DECREF(item);
1043 if (err < 0)
1044 goto finally;
1045 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001046 return list;
1047 finally:
1048 Py_DECREF(list);
1049 return NULL;
1050}
1051
1052
1053static char split__doc__[] =
1054"S.split([sep [,maxsplit]]) -> list of strings\n\
1055\n\
1056Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001057delimiter string. If maxsplit is given, at most maxsplit\n\
1058splits are done. If sep is not specified, any whitespace string\n\
1059is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001060
1061static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001062string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001063{
1064 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001065 int maxsplit = -1;
1066 const char *s = PyString_AS_STRING(self), *sub;
1067 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001068
Guido van Rossum4c08d552000-03-10 22:55:18 +00001069 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001070 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001071 if (maxsplit < 0)
1072 maxsplit = INT_MAX;
1073 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001074 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001075 if (PyString_Check(subobj)) {
1076 sub = PyString_AS_STRING(subobj);
1077 n = PyString_GET_SIZE(subobj);
1078 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001079#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001080 else if (PyUnicode_Check(subobj))
1081 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001082#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001083 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1084 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001085 if (n == 0) {
1086 PyErr_SetString(PyExc_ValueError, "empty separator");
1087 return NULL;
1088 }
1089
1090 list = PyList_New(0);
1091 if (list == NULL)
1092 return NULL;
1093
1094 i = j = 0;
1095 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001096 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001097 if (maxsplit-- <= 0)
1098 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001099 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1100 if (item == NULL)
1101 goto fail;
1102 err = PyList_Append(list, item);
1103 Py_DECREF(item);
1104 if (err < 0)
1105 goto fail;
1106 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001107 }
1108 else
1109 i++;
1110 }
1111 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1112 if (item == NULL)
1113 goto fail;
1114 err = PyList_Append(list, item);
1115 Py_DECREF(item);
1116 if (err < 0)
1117 goto fail;
1118
1119 return list;
1120
1121 fail:
1122 Py_DECREF(list);
1123 return NULL;
1124}
1125
1126
1127static char join__doc__[] =
1128"S.join(sequence) -> string\n\
1129\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001130Return a string which is the concatenation of the strings in the\n\
1131sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001132
1133static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001134string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001135{
1136 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001137 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001139 char *p;
1140 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001141 size_t sz = 0;
1142 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001143 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001144
Tim Peters19fe14e2001-01-19 03:03:47 +00001145 seq = PySequence_Fast(orig, "");
1146 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001147 if (PyErr_ExceptionMatches(PyExc_TypeError))
1148 PyErr_Format(PyExc_TypeError,
1149 "sequence expected, %.80s found",
1150 orig->ob_type->tp_name);
1151 return NULL;
1152 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001153
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001154 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001155 if (seqlen == 0) {
1156 Py_DECREF(seq);
1157 return PyString_FromString("");
1158 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001159 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001160 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001161 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1162 PyErr_Format(PyExc_TypeError,
1163 "sequence item 0: expected string,"
1164 " %.80s found",
1165 item->ob_type->tp_name);
1166 Py_DECREF(seq);
1167 return NULL;
1168 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001169 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001170 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001171 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001172 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001173
Tim Peters19fe14e2001-01-19 03:03:47 +00001174 /* There are at least two things to join. Do a pre-pass to figure out
1175 * the total amount of space we'll need (sz), see whether any argument
1176 * is absurd, and defer to the Unicode join if appropriate.
1177 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001178 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001179 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001180 item = PySequence_Fast_GET_ITEM(seq, i);
1181 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001182#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001183 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001184 /* Defer to Unicode join.
1185 * CAUTION: There's no gurantee that the
1186 * original sequence can be iterated over
1187 * again, so we must pass seq here.
1188 */
1189 PyObject *result;
1190 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001191 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001192 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001193 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001194#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001195 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001196 "sequence item %i: expected string,"
1197 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001198 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001199 Py_DECREF(seq);
1200 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001201 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001202 sz += PyString_GET_SIZE(item);
1203 if (i != 0)
1204 sz += seplen;
1205 if (sz < old_sz || sz > INT_MAX) {
1206 PyErr_SetString(PyExc_OverflowError,
1207 "join() is too long for a Python string");
1208 Py_DECREF(seq);
1209 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001210 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001211 }
1212
1213 /* Allocate result space. */
1214 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1215 if (res == NULL) {
1216 Py_DECREF(seq);
1217 return NULL;
1218 }
1219
1220 /* Catenate everything. */
1221 p = PyString_AS_STRING(res);
1222 for (i = 0; i < seqlen; ++i) {
1223 size_t n;
1224 item = PySequence_Fast_GET_ITEM(seq, i);
1225 n = PyString_GET_SIZE(item);
1226 memcpy(p, PyString_AS_STRING(item), n);
1227 p += n;
1228 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001229 memcpy(p, sep, seplen);
1230 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001231 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001232 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001233
Jeremy Hylton49048292000-07-11 03:28:17 +00001234 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001235 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001236}
1237
Tim Peters52e155e2001-06-16 05:42:57 +00001238PyObject *
1239_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001240{
Tim Petersa7259592001-06-16 05:11:17 +00001241 assert(sep != NULL && PyString_Check(sep));
1242 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001243 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001244}
1245
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001246static long
Fred Drakeba096332000-07-09 07:04:36 +00001247string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001248{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001249 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001250 int len = PyString_GET_SIZE(self);
1251 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001252 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001253
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001254 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001255 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001256 return -2;
1257 if (PyString_Check(subobj)) {
1258 sub = PyString_AS_STRING(subobj);
1259 n = PyString_GET_SIZE(subobj);
1260 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001261#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001262 else if (PyUnicode_Check(subobj))
1263 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001264#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001265 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001266 return -2;
1267
1268 if (last > len)
1269 last = len;
1270 if (last < 0)
1271 last += len;
1272 if (last < 0)
1273 last = 0;
1274 if (i < 0)
1275 i += len;
1276 if (i < 0)
1277 i = 0;
1278
Guido van Rossum4c08d552000-03-10 22:55:18 +00001279 if (dir > 0) {
1280 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001282 last -= n;
1283 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001284 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001285 return (long)i;
1286 }
1287 else {
1288 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001289
Guido van Rossum4c08d552000-03-10 22:55:18 +00001290 if (n == 0 && i <= last)
1291 return (long)last;
1292 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001293 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001294 return (long)j;
1295 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001296
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001297 return -1;
1298}
1299
1300
1301static char find__doc__[] =
1302"S.find(sub [,start [,end]]) -> int\n\
1303\n\
1304Return the lowest index in S where substring sub is found,\n\
1305such that sub is contained within s[start,end]. Optional\n\
1306arguments start and end are interpreted as in slice notation.\n\
1307\n\
1308Return -1 on failure.";
1309
1310static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001311string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001312{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001313 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001314 if (result == -2)
1315 return NULL;
1316 return PyInt_FromLong(result);
1317}
1318
1319
1320static char index__doc__[] =
1321"S.index(sub [,start [,end]]) -> int\n\
1322\n\
1323Like S.find() but raise ValueError when the substring is not found.";
1324
1325static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001326string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001328 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001329 if (result == -2)
1330 return NULL;
1331 if (result == -1) {
1332 PyErr_SetString(PyExc_ValueError,
1333 "substring not found in string.index");
1334 return NULL;
1335 }
1336 return PyInt_FromLong(result);
1337}
1338
1339
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001340static char rfind__doc__[] =
1341"S.rfind(sub [,start [,end]]) -> int\n\
1342\n\
1343Return the highest index in S where substring sub is found,\n\
1344such that sub is contained within s[start,end]. Optional\n\
1345arguments start and end are interpreted as in slice notation.\n\
1346\n\
1347Return -1 on failure.";
1348
1349static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001350string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001351{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001352 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001353 if (result == -2)
1354 return NULL;
1355 return PyInt_FromLong(result);
1356}
1357
1358
1359static char rindex__doc__[] =
1360"S.rindex(sub [,start [,end]]) -> int\n\
1361\n\
1362Like S.rfind() but raise ValueError when the substring is not found.";
1363
1364static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001365string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001366{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001367 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001368 if (result == -2)
1369 return NULL;
1370 if (result == -1) {
1371 PyErr_SetString(PyExc_ValueError,
1372 "substring not found in string.rindex");
1373 return NULL;
1374 }
1375 return PyInt_FromLong(result);
1376}
1377
1378
1379static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001380do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001381{
1382 char *s = PyString_AS_STRING(self);
1383 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001384
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001385 i = 0;
1386 if (striptype != RIGHTSTRIP) {
1387 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1388 i++;
1389 }
1390 }
1391
1392 j = len;
1393 if (striptype != LEFTSTRIP) {
1394 do {
1395 j--;
1396 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1397 j++;
1398 }
1399
Tim Peters8fa5dd02001-09-12 02:18:30 +00001400 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 Py_INCREF(self);
1402 return (PyObject*)self;
1403 }
1404 else
1405 return PyString_FromStringAndSize(s+i, j-i);
1406}
1407
1408
1409static char strip__doc__[] =
1410"S.strip() -> string\n\
1411\n\
1412Return a copy of the string S with leading and trailing\n\
1413whitespace removed.";
1414
1415static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001416string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001418 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419}
1420
1421
1422static char lstrip__doc__[] =
1423"S.lstrip() -> string\n\
1424\n\
1425Return a copy of the string S with leading whitespace removed.";
1426
1427static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001428string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001429{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001430 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431}
1432
1433
1434static char rstrip__doc__[] =
1435"S.rstrip() -> string\n\
1436\n\
1437Return a copy of the string S with trailing whitespace removed.";
1438
1439static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001440string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001442 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443}
1444
1445
1446static char lower__doc__[] =
1447"S.lower() -> string\n\
1448\n\
1449Return a copy of the string S converted to lowercase.";
1450
1451static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001452string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001453{
1454 char *s = PyString_AS_STRING(self), *s_new;
1455 int i, n = PyString_GET_SIZE(self);
1456 PyObject *new;
1457
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458 new = PyString_FromStringAndSize(NULL, n);
1459 if (new == NULL)
1460 return NULL;
1461 s_new = PyString_AsString(new);
1462 for (i = 0; i < n; i++) {
1463 int c = Py_CHARMASK(*s++);
1464 if (isupper(c)) {
1465 *s_new = tolower(c);
1466 } else
1467 *s_new = c;
1468 s_new++;
1469 }
1470 return new;
1471}
1472
1473
1474static char upper__doc__[] =
1475"S.upper() -> string\n\
1476\n\
1477Return a copy of the string S converted to uppercase.";
1478
1479static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001480string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481{
1482 char *s = PyString_AS_STRING(self), *s_new;
1483 int i, n = PyString_GET_SIZE(self);
1484 PyObject *new;
1485
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001486 new = PyString_FromStringAndSize(NULL, n);
1487 if (new == NULL)
1488 return NULL;
1489 s_new = PyString_AsString(new);
1490 for (i = 0; i < n; i++) {
1491 int c = Py_CHARMASK(*s++);
1492 if (islower(c)) {
1493 *s_new = toupper(c);
1494 } else
1495 *s_new = c;
1496 s_new++;
1497 }
1498 return new;
1499}
1500
1501
Guido van Rossum4c08d552000-03-10 22:55:18 +00001502static char title__doc__[] =
1503"S.title() -> string\n\
1504\n\
1505Return a titlecased version of S, i.e. words start with uppercase\n\
1506characters, all remaining cased characters have lowercase.";
1507
1508static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001509string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001510{
1511 char *s = PyString_AS_STRING(self), *s_new;
1512 int i, n = PyString_GET_SIZE(self);
1513 int previous_is_cased = 0;
1514 PyObject *new;
1515
Guido van Rossum4c08d552000-03-10 22:55:18 +00001516 new = PyString_FromStringAndSize(NULL, n);
1517 if (new == NULL)
1518 return NULL;
1519 s_new = PyString_AsString(new);
1520 for (i = 0; i < n; i++) {
1521 int c = Py_CHARMASK(*s++);
1522 if (islower(c)) {
1523 if (!previous_is_cased)
1524 c = toupper(c);
1525 previous_is_cased = 1;
1526 } else if (isupper(c)) {
1527 if (previous_is_cased)
1528 c = tolower(c);
1529 previous_is_cased = 1;
1530 } else
1531 previous_is_cased = 0;
1532 *s_new++ = c;
1533 }
1534 return new;
1535}
1536
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001537static char capitalize__doc__[] =
1538"S.capitalize() -> string\n\
1539\n\
1540Return a copy of the string S with only its first character\n\
1541capitalized.";
1542
1543static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001544string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545{
1546 char *s = PyString_AS_STRING(self), *s_new;
1547 int i, n = PyString_GET_SIZE(self);
1548 PyObject *new;
1549
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001550 new = PyString_FromStringAndSize(NULL, n);
1551 if (new == NULL)
1552 return NULL;
1553 s_new = PyString_AsString(new);
1554 if (0 < n) {
1555 int c = Py_CHARMASK(*s++);
1556 if (islower(c))
1557 *s_new = toupper(c);
1558 else
1559 *s_new = c;
1560 s_new++;
1561 }
1562 for (i = 1; i < n; i++) {
1563 int c = Py_CHARMASK(*s++);
1564 if (isupper(c))
1565 *s_new = tolower(c);
1566 else
1567 *s_new = c;
1568 s_new++;
1569 }
1570 return new;
1571}
1572
1573
1574static char count__doc__[] =
1575"S.count(sub[, start[, end]]) -> int\n\
1576\n\
1577Return the number of occurrences of substring sub in string\n\
1578S[start:end]. Optional arguments start and end are\n\
1579interpreted as in slice notation.";
1580
1581static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001582string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001583{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001584 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001585 int len = PyString_GET_SIZE(self), n;
1586 int i = 0, last = INT_MAX;
1587 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001588 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001589
Guido van Rossumc6821402000-05-08 14:08:05 +00001590 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1591 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001593
Guido van Rossum4c08d552000-03-10 22:55:18 +00001594 if (PyString_Check(subobj)) {
1595 sub = PyString_AS_STRING(subobj);
1596 n = PyString_GET_SIZE(subobj);
1597 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001598#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001599 else if (PyUnicode_Check(subobj)) {
1600 int count;
1601 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1602 if (count == -1)
1603 return NULL;
1604 else
1605 return PyInt_FromLong((long) count);
1606 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001607#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001608 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1609 return NULL;
1610
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611 if (last > len)
1612 last = len;
1613 if (last < 0)
1614 last += len;
1615 if (last < 0)
1616 last = 0;
1617 if (i < 0)
1618 i += len;
1619 if (i < 0)
1620 i = 0;
1621 m = last + 1 - n;
1622 if (n == 0)
1623 return PyInt_FromLong((long) (m-i));
1624
1625 r = 0;
1626 while (i < m) {
1627 if (!memcmp(s+i, sub, n)) {
1628 r++;
1629 i += n;
1630 } else {
1631 i++;
1632 }
1633 }
1634 return PyInt_FromLong((long) r);
1635}
1636
1637
1638static char swapcase__doc__[] =
1639"S.swapcase() -> string\n\
1640\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001641Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001642converted to lowercase and vice versa.";
1643
1644static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001645string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646{
1647 char *s = PyString_AS_STRING(self), *s_new;
1648 int i, n = PyString_GET_SIZE(self);
1649 PyObject *new;
1650
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651 new = PyString_FromStringAndSize(NULL, n);
1652 if (new == NULL)
1653 return NULL;
1654 s_new = PyString_AsString(new);
1655 for (i = 0; i < n; i++) {
1656 int c = Py_CHARMASK(*s++);
1657 if (islower(c)) {
1658 *s_new = toupper(c);
1659 }
1660 else if (isupper(c)) {
1661 *s_new = tolower(c);
1662 }
1663 else
1664 *s_new = c;
1665 s_new++;
1666 }
1667 return new;
1668}
1669
1670
1671static char translate__doc__[] =
1672"S.translate(table [,deletechars]) -> string\n\
1673\n\
1674Return a copy of the string S, where all characters occurring\n\
1675in the optional argument deletechars are removed, and the\n\
1676remaining characters have been mapped through the given\n\
1677translation table, which must be a string of length 256.";
1678
1679static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001680string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001681{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001682 register char *input, *output;
1683 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684 register int i, c, changed = 0;
1685 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001686 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687 int inlen, tablen, dellen = 0;
1688 PyObject *result;
1689 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001690 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691
Guido van Rossum4c08d552000-03-10 22:55:18 +00001692 if (!PyArg_ParseTuple(args, "O|O:translate",
1693 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001695
1696 if (PyString_Check(tableobj)) {
1697 table1 = PyString_AS_STRING(tableobj);
1698 tablen = PyString_GET_SIZE(tableobj);
1699 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001700#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001701 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001702 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001703 parameter; instead a mapping to None will cause characters
1704 to be deleted. */
1705 if (delobj != NULL) {
1706 PyErr_SetString(PyExc_TypeError,
1707 "deletions are implemented differently for unicode");
1708 return NULL;
1709 }
1710 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1711 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001712#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001713 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001715
1716 if (delobj != NULL) {
1717 if (PyString_Check(delobj)) {
1718 del_table = PyString_AS_STRING(delobj);
1719 dellen = PyString_GET_SIZE(delobj);
1720 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001721#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001722 else if (PyUnicode_Check(delobj)) {
1723 PyErr_SetString(PyExc_TypeError,
1724 "deletions are implemented differently for unicode");
1725 return NULL;
1726 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001727#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001728 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1729 return NULL;
1730
1731 if (tablen != 256) {
1732 PyErr_SetString(PyExc_ValueError,
1733 "translation table must be 256 characters long");
1734 return NULL;
1735 }
1736 }
1737 else {
1738 del_table = NULL;
1739 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 }
1741
1742 table = table1;
1743 inlen = PyString_Size(input_obj);
1744 result = PyString_FromStringAndSize((char *)NULL, inlen);
1745 if (result == NULL)
1746 return NULL;
1747 output_start = output = PyString_AsString(result);
1748 input = PyString_AsString(input_obj);
1749
1750 if (dellen == 0) {
1751 /* If no deletions are required, use faster code */
1752 for (i = inlen; --i >= 0; ) {
1753 c = Py_CHARMASK(*input++);
1754 if (Py_CHARMASK((*output++ = table[c])) != c)
1755 changed = 1;
1756 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001757 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 return result;
1759 Py_DECREF(result);
1760 Py_INCREF(input_obj);
1761 return input_obj;
1762 }
1763
1764 for (i = 0; i < 256; i++)
1765 trans_table[i] = Py_CHARMASK(table[i]);
1766
1767 for (i = 0; i < dellen; i++)
1768 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1769
1770 for (i = inlen; --i >= 0; ) {
1771 c = Py_CHARMASK(*input++);
1772 if (trans_table[c] != -1)
1773 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1774 continue;
1775 changed = 1;
1776 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001777 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778 Py_DECREF(result);
1779 Py_INCREF(input_obj);
1780 return input_obj;
1781 }
1782 /* Fix the size of the resulting string */
1783 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1784 return NULL;
1785 return result;
1786}
1787
1788
1789/* What follows is used for implementing replace(). Perry Stoll. */
1790
1791/*
1792 mymemfind
1793
1794 strstr replacement for arbitrary blocks of memory.
1795
Barry Warsaw51ac5802000-03-20 16:36:48 +00001796 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797 contents of memory pointed to by PAT. Returns the index into MEM if
1798 found, or -1 if not found. If len of PAT is greater than length of
1799 MEM, the function returns -1.
1800*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001801static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001802mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803{
1804 register int ii;
1805
1806 /* pattern can not occur in the last pat_len-1 chars */
1807 len -= pat_len;
1808
1809 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001810 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811 return ii;
1812 }
1813 }
1814 return -1;
1815}
1816
1817/*
1818 mymemcnt
1819
1820 Return the number of distinct times PAT is found in MEM.
1821 meaning mem=1111 and pat==11 returns 2.
1822 mem=11111 and pat==11 also return 2.
1823 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001824static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001825mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826{
1827 register int offset = 0;
1828 int nfound = 0;
1829
1830 while (len >= 0) {
1831 offset = mymemfind(mem, len, pat, pat_len);
1832 if (offset == -1)
1833 break;
1834 mem += offset + pat_len;
1835 len -= offset + pat_len;
1836 nfound++;
1837 }
1838 return nfound;
1839}
1840
1841/*
1842 mymemreplace
1843
Thomas Wouters7e474022000-07-16 12:04:32 +00001844 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845 replaced with SUB.
1846
Thomas Wouters7e474022000-07-16 12:04:32 +00001847 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848 of PAT in STR, then the original string is returned. Otherwise, a new
1849 string is allocated here and returned.
1850
1851 on return, out_len is:
1852 the length of output string, or
1853 -1 if the input string is returned, or
1854 unchanged if an error occurs (no memory).
1855
1856 return value is:
1857 the new string allocated locally, or
1858 NULL if an error occurred.
1859*/
1860static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001861mymemreplace(const char *str, int len, /* input string */
1862 const char *pat, int pat_len, /* pattern string to find */
1863 const char *sub, int sub_len, /* substitution string */
1864 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001865 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866{
1867 char *out_s;
1868 char *new_s;
1869 int nfound, offset, new_len;
1870
1871 if (len == 0 || pat_len > len)
1872 goto return_same;
1873
1874 /* find length of output string */
1875 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001876 if (count < 0)
1877 count = INT_MAX;
1878 else if (nfound > count)
1879 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880 if (nfound == 0)
1881 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001882
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001884 if (new_len == 0) {
1885 /* Have to allocate something for the caller to free(). */
1886 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001887 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001888 return NULL;
1889 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001890 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001891 else {
1892 assert(new_len > 0);
1893 new_s = (char *)PyMem_MALLOC(new_len);
1894 if (new_s == NULL)
1895 return NULL;
1896 out_s = new_s;
1897
Tim Peters9c012af2001-05-10 00:32:57 +00001898 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001899 /* find index of next instance of pattern */
1900 offset = mymemfind(str, len, pat, pat_len);
1901 if (offset == -1)
1902 break;
1903
1904 /* copy non matching part of input string */
1905 memcpy(new_s, str, offset);
1906 str += offset + pat_len;
1907 len -= offset + pat_len;
1908
1909 /* copy substitute into the output string */
1910 new_s += offset;
1911 memcpy(new_s, sub, sub_len);
1912 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001913 }
1914 /* copy any remaining values into output string */
1915 if (len > 0)
1916 memcpy(new_s, str, len);
1917 }
1918 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919 return out_s;
1920
1921 return_same:
1922 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001923 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924}
1925
1926
1927static char replace__doc__[] =
1928"S.replace (old, new[, maxsplit]) -> string\n\
1929\n\
1930Return a copy of string S with all occurrences of substring\n\
1931old replaced by new. If the optional argument maxsplit is\n\
1932given, only the first maxsplit occurrences are replaced.";
1933
1934static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001935string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001937 const char *str = PyString_AS_STRING(self), *sub, *repl;
1938 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001939 const int len = PyString_GET_SIZE(self);
1940 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001941 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001943 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944
Guido van Rossum4c08d552000-03-10 22:55:18 +00001945 if (!PyArg_ParseTuple(args, "OO|i:replace",
1946 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001948
1949 if (PyString_Check(subobj)) {
1950 sub = PyString_AS_STRING(subobj);
1951 sub_len = PyString_GET_SIZE(subobj);
1952 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001953#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001954 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001955 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001956 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001957#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001958 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1959 return NULL;
1960
1961 if (PyString_Check(replobj)) {
1962 repl = PyString_AS_STRING(replobj);
1963 repl_len = PyString_GET_SIZE(replobj);
1964 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001965#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001966 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001967 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001968 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001969#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001970 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1971 return NULL;
1972
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001973 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001974 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975 return NULL;
1976 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001977 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978 if (new_s == NULL) {
1979 PyErr_NoMemory();
1980 return NULL;
1981 }
1982 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001983 if (PyString_CheckExact(self)) {
1984 /* we're returning another reference to self */
1985 new = (PyObject*)self;
1986 Py_INCREF(new);
1987 }
1988 else {
1989 new = PyString_FromStringAndSize(str, len);
1990 if (new == NULL)
1991 return NULL;
1992 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993 }
1994 else {
1995 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001996 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997 }
1998 return new;
1999}
2000
2001
2002static char startswith__doc__[] =
2003"S.startswith(prefix[, start[, end]]) -> int\n\
2004\n\
2005Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
2006optional start, test S beginning at that position. With optional end, stop\n\
2007comparing S at that position.";
2008
2009static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002010string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002012 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002014 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015 int plen;
2016 int start = 0;
2017 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002018 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019
Guido van Rossumc6821402000-05-08 14:08:05 +00002020 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2021 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002022 return NULL;
2023 if (PyString_Check(subobj)) {
2024 prefix = PyString_AS_STRING(subobj);
2025 plen = PyString_GET_SIZE(subobj);
2026 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002027#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002028 else if (PyUnicode_Check(subobj)) {
2029 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002030 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002031 subobj, start, end, -1);
2032 if (rc == -1)
2033 return NULL;
2034 else
2035 return PyInt_FromLong((long) rc);
2036 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002037#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002038 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039 return NULL;
2040
2041 /* adopt Java semantics for index out of range. it is legal for
2042 * offset to be == plen, but this only returns true if prefix is
2043 * the empty string.
2044 */
2045 if (start < 0 || start+plen > len)
2046 return PyInt_FromLong(0);
2047
2048 if (!memcmp(str+start, prefix, plen)) {
2049 /* did the match end after the specified end? */
2050 if (end < 0)
2051 return PyInt_FromLong(1);
2052 else if (end - start < plen)
2053 return PyInt_FromLong(0);
2054 else
2055 return PyInt_FromLong(1);
2056 }
2057 else return PyInt_FromLong(0);
2058}
2059
2060
2061static char endswith__doc__[] =
2062"S.endswith(suffix[, start[, end]]) -> int\n\
2063\n\
2064Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2065optional start, test S beginning at that position. With optional end, stop\n\
2066comparing S at that position.";
2067
2068static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002069string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002073 const char* suffix;
2074 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075 int start = 0;
2076 int end = -1;
2077 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079
Guido van Rossumc6821402000-05-08 14:08:05 +00002080 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2081 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002082 return NULL;
2083 if (PyString_Check(subobj)) {
2084 suffix = PyString_AS_STRING(subobj);
2085 slen = PyString_GET_SIZE(subobj);
2086 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002087#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002088 else if (PyUnicode_Check(subobj)) {
2089 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002090 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002091 subobj, start, end, +1);
2092 if (rc == -1)
2093 return NULL;
2094 else
2095 return PyInt_FromLong((long) rc);
2096 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002097#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099 return NULL;
2100
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102 return PyInt_FromLong(0);
2103
2104 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002105 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108 return PyInt_FromLong(1);
2109 else return PyInt_FromLong(0);
2110}
2111
2112
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002113static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002114"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002115\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002116Encodes S using the codec registered for encoding. encoding defaults\n\
2117to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002118handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2119a ValueError. Other possible values are 'ignore' and 'replace'.";
2120
2121static PyObject *
2122string_encode(PyStringObject *self, PyObject *args)
2123{
2124 char *encoding = NULL;
2125 char *errors = NULL;
2126 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2127 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002128 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2129}
2130
2131
2132static char decode__doc__[] =
2133"S.decode([encoding[,errors]]) -> object\n\
2134\n\
2135Decodes S using the codec registered for encoding. encoding defaults\n\
2136to the default encoding. errors may be given to set a different error\n\
2137handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2138a ValueError. Other possible values are 'ignore' and 'replace'.";
2139
2140static PyObject *
2141string_decode(PyStringObject *self, PyObject *args)
2142{
2143 char *encoding = NULL;
2144 char *errors = NULL;
2145 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2146 return NULL;
2147 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002148}
2149
2150
Guido van Rossum4c08d552000-03-10 22:55:18 +00002151static char expandtabs__doc__[] =
2152"S.expandtabs([tabsize]) -> string\n\
2153\n\
2154Return a copy of S where all tab characters are expanded using spaces.\n\
2155If tabsize is not given, a tab size of 8 characters is assumed.";
2156
2157static PyObject*
2158string_expandtabs(PyStringObject *self, PyObject *args)
2159{
2160 const char *e, *p;
2161 char *q;
2162 int i, j;
2163 PyObject *u;
2164 int tabsize = 8;
2165
2166 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2167 return NULL;
2168
Thomas Wouters7e474022000-07-16 12:04:32 +00002169 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002170 i = j = 0;
2171 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2172 for (p = PyString_AS_STRING(self); p < e; p++)
2173 if (*p == '\t') {
2174 if (tabsize > 0)
2175 j += tabsize - (j % tabsize);
2176 }
2177 else {
2178 j++;
2179 if (*p == '\n' || *p == '\r') {
2180 i += j;
2181 j = 0;
2182 }
2183 }
2184
2185 /* Second pass: create output string and fill it */
2186 u = PyString_FromStringAndSize(NULL, i + j);
2187 if (!u)
2188 return NULL;
2189
2190 j = 0;
2191 q = PyString_AS_STRING(u);
2192
2193 for (p = PyString_AS_STRING(self); p < e; p++)
2194 if (*p == '\t') {
2195 if (tabsize > 0) {
2196 i = tabsize - (j % tabsize);
2197 j += i;
2198 while (i--)
2199 *q++ = ' ';
2200 }
2201 }
2202 else {
2203 j++;
2204 *q++ = *p;
2205 if (*p == '\n' || *p == '\r')
2206 j = 0;
2207 }
2208
2209 return u;
2210}
2211
Tim Peters8fa5dd02001-09-12 02:18:30 +00002212static PyObject *
2213pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002214{
2215 PyObject *u;
2216
2217 if (left < 0)
2218 left = 0;
2219 if (right < 0)
2220 right = 0;
2221
Tim Peters8fa5dd02001-09-12 02:18:30 +00002222 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002223 Py_INCREF(self);
2224 return (PyObject *)self;
2225 }
2226
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002227 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 left + PyString_GET_SIZE(self) + right);
2229 if (u) {
2230 if (left)
2231 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002232 memcpy(PyString_AS_STRING(u) + left,
2233 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002234 PyString_GET_SIZE(self));
2235 if (right)
2236 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2237 fill, right);
2238 }
2239
2240 return u;
2241}
2242
2243static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002244"S.ljust(width) -> string\n"
2245"\n"
2246"Return S left justified in a string of length width. Padding is\n"
2247"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002248
2249static PyObject *
2250string_ljust(PyStringObject *self, PyObject *args)
2251{
2252 int width;
2253 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2254 return NULL;
2255
Tim Peters8fa5dd02001-09-12 02:18:30 +00002256 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257 Py_INCREF(self);
2258 return (PyObject*) self;
2259 }
2260
2261 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2262}
2263
2264
2265static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002266"S.rjust(width) -> string\n"
2267"\n"
2268"Return S right justified in a string of length width. Padding is\n"
2269"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002270
2271static PyObject *
2272string_rjust(PyStringObject *self, PyObject *args)
2273{
2274 int width;
2275 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2276 return NULL;
2277
Tim Peters8fa5dd02001-09-12 02:18:30 +00002278 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002279 Py_INCREF(self);
2280 return (PyObject*) self;
2281 }
2282
2283 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2284}
2285
2286
2287static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002288"S.center(width) -> string\n"
2289"\n"
2290"Return S centered in a string of length width. Padding is done\n"
2291"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002292
2293static PyObject *
2294string_center(PyStringObject *self, PyObject *args)
2295{
2296 int marg, left;
2297 int width;
2298
2299 if (!PyArg_ParseTuple(args, "i:center", &width))
2300 return NULL;
2301
Tim Peters8fa5dd02001-09-12 02:18:30 +00002302 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 Py_INCREF(self);
2304 return (PyObject*) self;
2305 }
2306
2307 marg = width - PyString_GET_SIZE(self);
2308 left = marg / 2 + (marg & width & 1);
2309
2310 return pad(self, left, marg - left, ' ');
2311}
2312
Guido van Rossum4c08d552000-03-10 22:55:18 +00002313static char isspace__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002314"S.isspace() -> int\n"
2315"\n"
2316"Return 1 if there are only whitespace characters in S,\n"
2317"0 otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002318
2319static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002320string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002321{
Fred Drakeba096332000-07-09 07:04:36 +00002322 register const unsigned char *p
2323 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002324 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002325
Guido van Rossum4c08d552000-03-10 22:55:18 +00002326 /* Shortcut for single character strings */
2327 if (PyString_GET_SIZE(self) == 1 &&
2328 isspace(*p))
2329 return PyInt_FromLong(1);
2330
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002331 /* Special case for empty strings */
2332 if (PyString_GET_SIZE(self) == 0)
2333 return PyInt_FromLong(0);
2334
Guido van Rossum4c08d552000-03-10 22:55:18 +00002335 e = p + PyString_GET_SIZE(self);
2336 for (; p < e; p++) {
2337 if (!isspace(*p))
2338 return PyInt_FromLong(0);
2339 }
2340 return PyInt_FromLong(1);
2341}
2342
2343
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002344static char isalpha__doc__[] =
2345"S.isalpha() -> int\n\
2346\n\
2347Return 1 if all characters in S are alphabetic\n\
2348and there is at least one character in S, 0 otherwise.";
2349
2350static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002351string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002352{
Fred Drakeba096332000-07-09 07:04:36 +00002353 register const unsigned char *p
2354 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002355 register const unsigned char *e;
2356
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002357 /* Shortcut for single character strings */
2358 if (PyString_GET_SIZE(self) == 1 &&
2359 isalpha(*p))
2360 return PyInt_FromLong(1);
2361
2362 /* Special case for empty strings */
2363 if (PyString_GET_SIZE(self) == 0)
2364 return PyInt_FromLong(0);
2365
2366 e = p + PyString_GET_SIZE(self);
2367 for (; p < e; p++) {
2368 if (!isalpha(*p))
2369 return PyInt_FromLong(0);
2370 }
2371 return PyInt_FromLong(1);
2372}
2373
2374
2375static char isalnum__doc__[] =
2376"S.isalnum() -> int\n\
2377\n\
2378Return 1 if all characters in S are alphanumeric\n\
2379and there is at least one character in S, 0 otherwise.";
2380
2381static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002382string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002383{
Fred Drakeba096332000-07-09 07:04:36 +00002384 register const unsigned char *p
2385 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002386 register const unsigned char *e;
2387
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002388 /* Shortcut for single character strings */
2389 if (PyString_GET_SIZE(self) == 1 &&
2390 isalnum(*p))
2391 return PyInt_FromLong(1);
2392
2393 /* Special case for empty strings */
2394 if (PyString_GET_SIZE(self) == 0)
2395 return PyInt_FromLong(0);
2396
2397 e = p + PyString_GET_SIZE(self);
2398 for (; p < e; p++) {
2399 if (!isalnum(*p))
2400 return PyInt_FromLong(0);
2401 }
2402 return PyInt_FromLong(1);
2403}
2404
2405
Guido van Rossum4c08d552000-03-10 22:55:18 +00002406static char isdigit__doc__[] =
2407"S.isdigit() -> int\n\
2408\n\
2409Return 1 if there are only digit characters in S,\n\
24100 otherwise.";
2411
2412static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002413string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002414{
Fred Drakeba096332000-07-09 07:04:36 +00002415 register const unsigned char *p
2416 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002417 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002418
Guido van Rossum4c08d552000-03-10 22:55:18 +00002419 /* Shortcut for single character strings */
2420 if (PyString_GET_SIZE(self) == 1 &&
2421 isdigit(*p))
2422 return PyInt_FromLong(1);
2423
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002424 /* Special case for empty strings */
2425 if (PyString_GET_SIZE(self) == 0)
2426 return PyInt_FromLong(0);
2427
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428 e = p + PyString_GET_SIZE(self);
2429 for (; p < e; p++) {
2430 if (!isdigit(*p))
2431 return PyInt_FromLong(0);
2432 }
2433 return PyInt_FromLong(1);
2434}
2435
2436
2437static char islower__doc__[] =
2438"S.islower() -> int\n\
2439\n\
2440Return 1 if all cased characters in S are lowercase and there is\n\
2441at least one cased character in S, 0 otherwise.";
2442
2443static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002444string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002445{
Fred Drakeba096332000-07-09 07:04:36 +00002446 register const unsigned char *p
2447 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002448 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449 int cased;
2450
Guido van Rossum4c08d552000-03-10 22:55:18 +00002451 /* Shortcut for single character strings */
2452 if (PyString_GET_SIZE(self) == 1)
2453 return PyInt_FromLong(islower(*p) != 0);
2454
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002455 /* Special case for empty strings */
2456 if (PyString_GET_SIZE(self) == 0)
2457 return PyInt_FromLong(0);
2458
Guido van Rossum4c08d552000-03-10 22:55:18 +00002459 e = p + PyString_GET_SIZE(self);
2460 cased = 0;
2461 for (; p < e; p++) {
2462 if (isupper(*p))
2463 return PyInt_FromLong(0);
2464 else if (!cased && islower(*p))
2465 cased = 1;
2466 }
2467 return PyInt_FromLong(cased);
2468}
2469
2470
2471static char isupper__doc__[] =
2472"S.isupper() -> int\n\
2473\n\
2474Return 1 if all cased characters in S are uppercase and there is\n\
2475at least one cased character in S, 0 otherwise.";
2476
2477static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002478string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002479{
Fred Drakeba096332000-07-09 07:04:36 +00002480 register const unsigned char *p
2481 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002482 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002483 int cased;
2484
Guido van Rossum4c08d552000-03-10 22:55:18 +00002485 /* Shortcut for single character strings */
2486 if (PyString_GET_SIZE(self) == 1)
2487 return PyInt_FromLong(isupper(*p) != 0);
2488
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002489 /* Special case for empty strings */
2490 if (PyString_GET_SIZE(self) == 0)
2491 return PyInt_FromLong(0);
2492
Guido van Rossum4c08d552000-03-10 22:55:18 +00002493 e = p + PyString_GET_SIZE(self);
2494 cased = 0;
2495 for (; p < e; p++) {
2496 if (islower(*p))
2497 return PyInt_FromLong(0);
2498 else if (!cased && isupper(*p))
2499 cased = 1;
2500 }
2501 return PyInt_FromLong(cased);
2502}
2503
2504
2505static char istitle__doc__[] =
2506"S.istitle() -> int\n\
2507\n\
2508Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2509may only follow uncased characters and lowercase characters only cased\n\
2510ones. Return 0 otherwise.";
2511
2512static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002513string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002514{
Fred Drakeba096332000-07-09 07:04:36 +00002515 register const unsigned char *p
2516 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002517 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002518 int cased, previous_is_cased;
2519
Guido van Rossum4c08d552000-03-10 22:55:18 +00002520 /* Shortcut for single character strings */
2521 if (PyString_GET_SIZE(self) == 1)
2522 return PyInt_FromLong(isupper(*p) != 0);
2523
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002524 /* Special case for empty strings */
2525 if (PyString_GET_SIZE(self) == 0)
2526 return PyInt_FromLong(0);
2527
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528 e = p + PyString_GET_SIZE(self);
2529 cased = 0;
2530 previous_is_cased = 0;
2531 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002532 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533
2534 if (isupper(ch)) {
2535 if (previous_is_cased)
2536 return PyInt_FromLong(0);
2537 previous_is_cased = 1;
2538 cased = 1;
2539 }
2540 else if (islower(ch)) {
2541 if (!previous_is_cased)
2542 return PyInt_FromLong(0);
2543 previous_is_cased = 1;
2544 cased = 1;
2545 }
2546 else
2547 previous_is_cased = 0;
2548 }
2549 return PyInt_FromLong(cased);
2550}
2551
2552
2553static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002554"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002555\n\
2556Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002557Line breaks are not included in the resulting list unless keepends\n\
2558is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002559
2560#define SPLIT_APPEND(data, left, right) \
2561 str = PyString_FromStringAndSize(data + left, right - left); \
2562 if (!str) \
2563 goto onError; \
2564 if (PyList_Append(list, str)) { \
2565 Py_DECREF(str); \
2566 goto onError; \
2567 } \
2568 else \
2569 Py_DECREF(str);
2570
2571static PyObject*
2572string_splitlines(PyStringObject *self, PyObject *args)
2573{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002574 register int i;
2575 register int j;
2576 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002577 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002578 PyObject *list;
2579 PyObject *str;
2580 char *data;
2581
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002582 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002583 return NULL;
2584
2585 data = PyString_AS_STRING(self);
2586 len = PyString_GET_SIZE(self);
2587
Guido van Rossum4c08d552000-03-10 22:55:18 +00002588 list = PyList_New(0);
2589 if (!list)
2590 goto onError;
2591
2592 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002593 int eol;
2594
Guido van Rossum4c08d552000-03-10 22:55:18 +00002595 /* Find a line and append it */
2596 while (i < len && data[i] != '\n' && data[i] != '\r')
2597 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002598
2599 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002600 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002601 if (i < len) {
2602 if (data[i] == '\r' && i + 1 < len &&
2603 data[i+1] == '\n')
2604 i += 2;
2605 else
2606 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002607 if (keepends)
2608 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002609 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002610 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611 j = i;
2612 }
2613 if (j < len) {
2614 SPLIT_APPEND(data, j, len);
2615 }
2616
2617 return list;
2618
2619 onError:
2620 Py_DECREF(list);
2621 return NULL;
2622}
2623
2624#undef SPLIT_APPEND
2625
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002626
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002627static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002628string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002629 /* Counterparts of the obsolete stropmodule functions; except
2630 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002631 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2632 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2633 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2634 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2635 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2636 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2637 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2638 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2639 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2640 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2641 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2642 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2643 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2644 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2645 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2646 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2647 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2648 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2649 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2650 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2651 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2652 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2653 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2654 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2655 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2656 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2657 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2658 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2659 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2660 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2661 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2662 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2663 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002664#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002665 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002666#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002667 {NULL, NULL} /* sentinel */
2668};
2669
Guido van Rossumae960af2001-08-30 03:11:59 +00002670staticforward PyObject *
2671str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2672
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002673static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002674string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002675{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002676 PyObject *x = NULL;
2677 static char *kwlist[] = {"object", 0};
2678
Guido van Rossumae960af2001-08-30 03:11:59 +00002679 if (type != &PyString_Type)
2680 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002681 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2682 return NULL;
2683 if (x == NULL)
2684 return PyString_FromString("");
2685 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002686}
2687
Guido van Rossumae960af2001-08-30 03:11:59 +00002688static PyObject *
2689str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2690{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002691 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002692 int n;
2693
2694 assert(PyType_IsSubtype(type, &PyString_Type));
2695 tmp = string_new(&PyString_Type, args, kwds);
2696 if (tmp == NULL)
2697 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002698 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002699 n = PyString_GET_SIZE(tmp);
2700 pnew = type->tp_alloc(type, n);
2701 if (pnew != NULL) {
2702 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002703 ((PyStringObject *)pnew)->ob_shash =
2704 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002705 ((PyStringObject *)pnew)->ob_sinterned =
2706 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002707 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002708 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002709 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002710}
2711
Tim Peters6d6c1a32001-08-02 04:15:00 +00002712static char string_doc[] =
2713"str(object) -> string\n\
2714\n\
2715Return a nice string representation of the object.\n\
2716If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002717
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002718PyTypeObject PyString_Type = {
2719 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002720 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002721 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002722 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002723 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002724 (destructor)string_dealloc, /* tp_dealloc */
2725 (printfunc)string_print, /* tp_print */
2726 0, /* tp_getattr */
2727 0, /* tp_setattr */
2728 0, /* tp_compare */
2729 (reprfunc)string_repr, /* tp_repr */
2730 0, /* tp_as_number */
2731 &string_as_sequence, /* tp_as_sequence */
2732 0, /* tp_as_mapping */
2733 (hashfunc)string_hash, /* tp_hash */
2734 0, /* tp_call */
2735 (reprfunc)string_str, /* tp_str */
2736 PyObject_GenericGetAttr, /* tp_getattro */
2737 0, /* tp_setattro */
2738 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002739 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002740 string_doc, /* tp_doc */
2741 0, /* tp_traverse */
2742 0, /* tp_clear */
2743 (richcmpfunc)string_richcompare, /* tp_richcompare */
2744 0, /* tp_weaklistoffset */
2745 0, /* tp_iter */
2746 0, /* tp_iternext */
2747 string_methods, /* tp_methods */
2748 0, /* tp_members */
2749 0, /* tp_getset */
2750 0, /* tp_base */
2751 0, /* tp_dict */
2752 0, /* tp_descr_get */
2753 0, /* tp_descr_set */
2754 0, /* tp_dictoffset */
2755 0, /* tp_init */
2756 0, /* tp_alloc */
2757 string_new, /* tp_new */
Neil Schemenauerdcc819a2002-03-22 15:33:15 +00002758 _PyMalloc_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002759};
2760
2761void
Fred Drakeba096332000-07-09 07:04:36 +00002762PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002763{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002764 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002765 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002766 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002767 if (w == NULL || !PyString_Check(*pv)) {
2768 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002769 *pv = NULL;
2770 return;
2771 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002772 v = string_concat((PyStringObject *) *pv, w);
2773 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002774 *pv = v;
2775}
2776
Guido van Rossum013142a1994-08-30 08:19:36 +00002777void
Fred Drakeba096332000-07-09 07:04:36 +00002778PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002779{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002780 PyString_Concat(pv, w);
2781 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002782}
2783
2784
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002785/* The following function breaks the notion that strings are immutable:
2786 it changes the size of a string. We get away with this only if there
2787 is only one module referencing the object. You can also think of it
2788 as creating a new string object and destroying the old one, only
2789 more efficiently. In any case, don't use this if the string may
2790 already be known to some other part of the code... */
2791
2792int
Fred Drakeba096332000-07-09 07:04:36 +00002793_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002794{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002795 register PyObject *v;
2796 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002797 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002799 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002800 Py_DECREF(v);
2801 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002802 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002803 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002804 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002805#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002806 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002807#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002808 _Py_ForgetReference(v);
2809 *pv = (PyObject *)
Neil Schemenauerdcc819a2002-03-22 15:33:15 +00002810 _PyMalloc_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002811 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002812 if (*pv == NULL) {
Neil Schemenauerdcc819a2002-03-22 15:33:15 +00002813 PyMalloc_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002814 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002815 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002816 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002817 _Py_NewReference(*pv);
2818 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002819 sv->ob_size = newsize;
2820 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002821 return 0;
2822}
Guido van Rossume5372401993-03-16 12:15:04 +00002823
2824/* Helpers for formatstring */
2825
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002826static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002827getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002828{
2829 int argidx = *p_argidx;
2830 if (argidx < arglen) {
2831 (*p_argidx)++;
2832 if (arglen < 0)
2833 return args;
2834 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002835 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002836 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002837 PyErr_SetString(PyExc_TypeError,
2838 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002839 return NULL;
2840}
2841
Tim Peters38fd5b62000-09-21 05:43:11 +00002842/* Format codes
2843 * F_LJUST '-'
2844 * F_SIGN '+'
2845 * F_BLANK ' '
2846 * F_ALT '#'
2847 * F_ZERO '0'
2848 */
Guido van Rossume5372401993-03-16 12:15:04 +00002849#define F_LJUST (1<<0)
2850#define F_SIGN (1<<1)
2851#define F_BLANK (1<<2)
2852#define F_ALT (1<<3)
2853#define F_ZERO (1<<4)
2854
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002855static int
Fred Drakeba096332000-07-09 07:04:36 +00002856formatfloat(char *buf, size_t buflen, int flags,
2857 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002858{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002859 /* fmt = '%#.' + `prec` + `type`
2860 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002861 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002862 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002863 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002864 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002865 if (prec < 0)
2866 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002867 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2868 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00002869 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
2870 (flags&F_ALT) ? "#" : "",
2871 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002872 /* worst case length calc to ensure no buffer overrun:
2873 fmt = %#.<prec>g
2874 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002875 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002876 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2877 If prec=0 the effective precision is 1 (the leading digit is
2878 always given), therefore increase by one to 10+prec. */
2879 if (buflen <= (size_t)10 + (size_t)prec) {
2880 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002881 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002882 return -1;
2883 }
Tim Peters885d4572001-11-28 20:27:42 +00002884 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002885 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002886}
2887
Tim Peters38fd5b62000-09-21 05:43:11 +00002888/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2889 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2890 * Python's regular ints.
2891 * Return value: a new PyString*, or NULL if error.
2892 * . *pbuf is set to point into it,
2893 * *plen set to the # of chars following that.
2894 * Caller must decref it when done using pbuf.
2895 * The string starting at *pbuf is of the form
2896 * "-"? ("0x" | "0X")? digit+
2897 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002898 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002899 * There will be at least prec digits, zero-filled on the left if
2900 * necessary to get that many.
2901 * val object to be converted
2902 * flags bitmask of format flags; only F_ALT is looked at
2903 * prec minimum number of digits; 0-fill on left if needed
2904 * type a character in [duoxX]; u acts the same as d
2905 *
2906 * CAUTION: o, x and X conversions on regular ints can never
2907 * produce a '-' sign, but can for Python's unbounded ints.
2908 */
2909PyObject*
2910_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2911 char **pbuf, int *plen)
2912{
2913 PyObject *result = NULL;
2914 char *buf;
2915 int i;
2916 int sign; /* 1 if '-', else 0 */
2917 int len; /* number of characters */
2918 int numdigits; /* len == numnondigits + numdigits */
2919 int numnondigits = 0;
2920
2921 switch (type) {
2922 case 'd':
2923 case 'u':
2924 result = val->ob_type->tp_str(val);
2925 break;
2926 case 'o':
2927 result = val->ob_type->tp_as_number->nb_oct(val);
2928 break;
2929 case 'x':
2930 case 'X':
2931 numnondigits = 2;
2932 result = val->ob_type->tp_as_number->nb_hex(val);
2933 break;
2934 default:
2935 assert(!"'type' not in [duoxX]");
2936 }
2937 if (!result)
2938 return NULL;
2939
2940 /* To modify the string in-place, there can only be one reference. */
2941 if (result->ob_refcnt != 1) {
2942 PyErr_BadInternalCall();
2943 return NULL;
2944 }
2945 buf = PyString_AsString(result);
2946 len = PyString_Size(result);
2947 if (buf[len-1] == 'L') {
2948 --len;
2949 buf[len] = '\0';
2950 }
2951 sign = buf[0] == '-';
2952 numnondigits += sign;
2953 numdigits = len - numnondigits;
2954 assert(numdigits > 0);
2955
Tim Petersfff53252001-04-12 18:38:48 +00002956 /* Get rid of base marker unless F_ALT */
2957 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002958 /* Need to skip 0x, 0X or 0. */
2959 int skipped = 0;
2960 switch (type) {
2961 case 'o':
2962 assert(buf[sign] == '0');
2963 /* If 0 is only digit, leave it alone. */
2964 if (numdigits > 1) {
2965 skipped = 1;
2966 --numdigits;
2967 }
2968 break;
2969 case 'x':
2970 case 'X':
2971 assert(buf[sign] == '0');
2972 assert(buf[sign + 1] == 'x');
2973 skipped = 2;
2974 numnondigits -= 2;
2975 break;
2976 }
2977 if (skipped) {
2978 buf += skipped;
2979 len -= skipped;
2980 if (sign)
2981 buf[0] = '-';
2982 }
2983 assert(len == numnondigits + numdigits);
2984 assert(numdigits > 0);
2985 }
2986
2987 /* Fill with leading zeroes to meet minimum width. */
2988 if (prec > numdigits) {
2989 PyObject *r1 = PyString_FromStringAndSize(NULL,
2990 numnondigits + prec);
2991 char *b1;
2992 if (!r1) {
2993 Py_DECREF(result);
2994 return NULL;
2995 }
2996 b1 = PyString_AS_STRING(r1);
2997 for (i = 0; i < numnondigits; ++i)
2998 *b1++ = *buf++;
2999 for (i = 0; i < prec - numdigits; i++)
3000 *b1++ = '0';
3001 for (i = 0; i < numdigits; i++)
3002 *b1++ = *buf++;
3003 *b1 = '\0';
3004 Py_DECREF(result);
3005 result = r1;
3006 buf = PyString_AS_STRING(result);
3007 len = numnondigits + prec;
3008 }
3009
3010 /* Fix up case for hex conversions. */
3011 switch (type) {
3012 case 'x':
3013 /* Need to convert all upper case letters to lower case. */
3014 for (i = 0; i < len; i++)
3015 if (buf[i] >= 'A' && buf[i] <= 'F')
3016 buf[i] += 'a'-'A';
3017 break;
3018 case 'X':
3019 /* Need to convert 0x to 0X (and -0x to -0X). */
3020 if (buf[sign + 1] == 'x')
3021 buf[sign + 1] = 'X';
3022 break;
3023 }
3024 *pbuf = buf;
3025 *plen = len;
3026 return result;
3027}
3028
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003029static int
Fred Drakeba096332000-07-09 07:04:36 +00003030formatint(char *buf, size_t buflen, int flags,
3031 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003032{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003033 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003034 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3035 + 1 + 1 = 24 */
3036 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003037 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003038
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003039 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003040 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003041 if (prec < 0)
3042 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003043
3044 if ((flags & F_ALT) &&
3045 (type == 'x' || type == 'X')) {
3046 /* When converting under %#x or %#X, there are a number
3047 * of issues that cause pain:
3048 * - when 0 is being converted, the C standard leaves off
3049 * the '0x' or '0X', which is inconsistent with other
3050 * %#x/%#X conversions and inconsistent with Python's
3051 * hex() function
3052 * - there are platforms that violate the standard and
3053 * convert 0 with the '0x' or '0X'
3054 * (Metrowerks, Compaq Tru64)
3055 * - there are platforms that give '0x' when converting
3056 * under %#X, but convert 0 in accordance with the
3057 * standard (OS/2 EMX)
3058 *
3059 * We can achieve the desired consistency by inserting our
3060 * own '0x' or '0X' prefix, and substituting %x/%X in place
3061 * of %#x/%#X.
3062 *
3063 * Note that this is the same approach as used in
3064 * formatint() in unicodeobject.c
3065 */
3066 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
3067 type, prec, type);
3068 }
3069 else {
3070 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3071 (flags&F_ALT) ? "#" : "",
3072 prec, type);
3073 }
3074
Tim Peters38fd5b62000-09-21 05:43:11 +00003075 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003076 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3077 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003078 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003079 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003080 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003081 return -1;
3082 }
Tim Peters885d4572001-11-28 20:27:42 +00003083 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003084 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003085}
3086
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003087static int
Fred Drakeba096332000-07-09 07:04:36 +00003088formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003089{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003090 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003091 if (PyString_Check(v)) {
3092 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003093 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003094 }
3095 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003096 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003097 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003098 }
3099 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003100 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003101}
3102
Guido van Rossum013142a1994-08-30 08:19:36 +00003103
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003104/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3105
3106 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3107 chars are formatted. XXX This is a magic number. Each formatting
3108 routine does bounds checking to ensure no overflow, but a better
3109 solution may be to malloc a buffer of appropriate size for each
3110 format. For now, the current solution is sufficient.
3111*/
3112#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003113
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003114PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003115PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003116{
3117 char *fmt, *res;
3118 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003119 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003120 PyObject *result, *orig_args;
3121#ifdef Py_USING_UNICODE
3122 PyObject *v, *w;
3123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003124 PyObject *dict = NULL;
3125 if (format == NULL || !PyString_Check(format) || args == NULL) {
3126 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003127 return NULL;
3128 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003129 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003130 fmt = PyString_AS_STRING(format);
3131 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003132 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003133 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003134 if (result == NULL)
3135 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003136 res = PyString_AsString(result);
3137 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003138 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003139 argidx = 0;
3140 }
3141 else {
3142 arglen = -1;
3143 argidx = -2;
3144 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003145 if (args->ob_type->tp_as_mapping)
3146 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003147 while (--fmtcnt >= 0) {
3148 if (*fmt != '%') {
3149 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003150 rescnt = fmtcnt + 100;
3151 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003152 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003153 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003154 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003155 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003156 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003157 }
3158 *res++ = *fmt++;
3159 }
3160 else {
3161 /* Got a format specifier */
3162 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003163 int width = -1;
3164 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003165 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003166 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003167 PyObject *v = NULL;
3168 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003169 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003170 int sign;
3171 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003172 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003173#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003174 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003175 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003176#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003177
Guido van Rossumda9c2711996-12-05 21:58:58 +00003178 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003179 if (*fmt == '(') {
3180 char *keystart;
3181 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003182 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003183 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003184
3185 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003186 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003187 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003188 goto error;
3189 }
3190 ++fmt;
3191 --fmtcnt;
3192 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003193 /* Skip over balanced parentheses */
3194 while (pcount > 0 && --fmtcnt >= 0) {
3195 if (*fmt == ')')
3196 --pcount;
3197 else if (*fmt == '(')
3198 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003199 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003200 }
3201 keylen = fmt - keystart - 1;
3202 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003203 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003204 "incomplete format key");
3205 goto error;
3206 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003207 key = PyString_FromStringAndSize(keystart,
3208 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003209 if (key == NULL)
3210 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003211 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003212 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003213 args_owned = 0;
3214 }
3215 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003216 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003217 if (args == NULL) {
3218 goto error;
3219 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003220 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003221 arglen = -1;
3222 argidx = -2;
3223 }
Guido van Rossume5372401993-03-16 12:15:04 +00003224 while (--fmtcnt >= 0) {
3225 switch (c = *fmt++) {
3226 case '-': flags |= F_LJUST; continue;
3227 case '+': flags |= F_SIGN; continue;
3228 case ' ': flags |= F_BLANK; continue;
3229 case '#': flags |= F_ALT; continue;
3230 case '0': flags |= F_ZERO; continue;
3231 }
3232 break;
3233 }
3234 if (c == '*') {
3235 v = getnextarg(args, arglen, &argidx);
3236 if (v == NULL)
3237 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003238 if (!PyInt_Check(v)) {
3239 PyErr_SetString(PyExc_TypeError,
3240 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003241 goto error;
3242 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003243 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003244 if (width < 0) {
3245 flags |= F_LJUST;
3246 width = -width;
3247 }
Guido van Rossume5372401993-03-16 12:15:04 +00003248 if (--fmtcnt >= 0)
3249 c = *fmt++;
3250 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003251 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003252 width = c - '0';
3253 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003254 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003255 if (!isdigit(c))
3256 break;
3257 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003258 PyErr_SetString(
3259 PyExc_ValueError,
3260 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003261 goto error;
3262 }
3263 width = width*10 + (c - '0');
3264 }
3265 }
3266 if (c == '.') {
3267 prec = 0;
3268 if (--fmtcnt >= 0)
3269 c = *fmt++;
3270 if (c == '*') {
3271 v = getnextarg(args, arglen, &argidx);
3272 if (v == NULL)
3273 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003274 if (!PyInt_Check(v)) {
3275 PyErr_SetString(
3276 PyExc_TypeError,
3277 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003278 goto error;
3279 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003280 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003281 if (prec < 0)
3282 prec = 0;
3283 if (--fmtcnt >= 0)
3284 c = *fmt++;
3285 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003286 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003287 prec = c - '0';
3288 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003289 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003290 if (!isdigit(c))
3291 break;
3292 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003293 PyErr_SetString(
3294 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003295 "prec too big");
3296 goto error;
3297 }
3298 prec = prec*10 + (c - '0');
3299 }
3300 }
3301 } /* prec */
3302 if (fmtcnt >= 0) {
3303 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003304 if (--fmtcnt >= 0)
3305 c = *fmt++;
3306 }
3307 }
3308 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003309 PyErr_SetString(PyExc_ValueError,
3310 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003311 goto error;
3312 }
3313 if (c != '%') {
3314 v = getnextarg(args, arglen, &argidx);
3315 if (v == NULL)
3316 goto error;
3317 }
3318 sign = 0;
3319 fill = ' ';
3320 switch (c) {
3321 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003322 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003323 len = 1;
3324 break;
3325 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003326 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003327#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003328 if (PyUnicode_Check(v)) {
3329 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003330 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003331 goto unicode;
3332 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003333#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003334 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003335 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003336 else
3337 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003338 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003339 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003340 if (!PyString_Check(temp)) {
3341 PyErr_SetString(PyExc_TypeError,
3342 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003343 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003344 goto error;
3345 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003346 pbuf = PyString_AS_STRING(temp);
3347 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003348 if (prec >= 0 && len > prec)
3349 len = prec;
3350 break;
3351 case 'i':
3352 case 'd':
3353 case 'u':
3354 case 'o':
3355 case 'x':
3356 case 'X':
3357 if (c == 'i')
3358 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003359 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003360 temp = _PyString_FormatLong(v, flags,
3361 prec, c, &pbuf, &len);
3362 if (!temp)
3363 goto error;
3364 /* unbounded ints can always produce
3365 a sign character! */
3366 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003367 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003368 else {
3369 pbuf = formatbuf;
3370 len = formatint(pbuf, sizeof(formatbuf),
3371 flags, prec, c, v);
3372 if (len < 0)
3373 goto error;
3374 /* only d conversion is signed */
3375 sign = c == 'd';
3376 }
3377 if (flags & F_ZERO)
3378 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003379 break;
3380 case 'e':
3381 case 'E':
3382 case 'f':
3383 case 'g':
3384 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003385 pbuf = formatbuf;
3386 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003387 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003388 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003389 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003390 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003391 fill = '0';
3392 break;
3393 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003394 pbuf = formatbuf;
3395 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003396 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003397 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003398 break;
3399 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003400 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003401 "unsupported format character '%c' (0x%x) "
3402 "at index %i",
3403 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003404 goto error;
3405 }
3406 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003407 if (*pbuf == '-' || *pbuf == '+') {
3408 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003409 len--;
3410 }
3411 else if (flags & F_SIGN)
3412 sign = '+';
3413 else if (flags & F_BLANK)
3414 sign = ' ';
3415 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003416 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003417 }
3418 if (width < len)
3419 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003420 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003421 reslen -= rescnt;
3422 rescnt = width + fmtcnt + 100;
3423 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003424 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003425 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003426 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003427 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003428 }
3429 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003430 if (fill != ' ')
3431 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003432 rescnt--;
3433 if (width > len)
3434 width--;
3435 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003436 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3437 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003438 assert(pbuf[1] == c);
3439 if (fill != ' ') {
3440 *res++ = *pbuf++;
3441 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003442 }
Tim Petersfff53252001-04-12 18:38:48 +00003443 rescnt -= 2;
3444 width -= 2;
3445 if (width < 0)
3446 width = 0;
3447 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003448 }
3449 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003450 do {
3451 --rescnt;
3452 *res++ = fill;
3453 } while (--width > len);
3454 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003455 if (fill == ' ') {
3456 if (sign)
3457 *res++ = sign;
3458 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003459 (c == 'x' || c == 'X')) {
3460 assert(pbuf[0] == '0');
3461 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003462 *res++ = *pbuf++;
3463 *res++ = *pbuf++;
3464 }
3465 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003466 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003467 res += len;
3468 rescnt -= len;
3469 while (--width >= len) {
3470 --rescnt;
3471 *res++ = ' ';
3472 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003473 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003474 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003475 "not all arguments converted");
3476 goto error;
3477 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003478 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003479 } /* '%' */
3480 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003481 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003482 PyErr_SetString(PyExc_TypeError,
3483 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003484 goto error;
3485 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003486 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003487 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003488 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003489 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003490 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003491
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003492#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003493 unicode:
3494 if (args_owned) {
3495 Py_DECREF(args);
3496 args_owned = 0;
3497 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003498 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003499 if (PyTuple_Check(orig_args) && argidx > 0) {
3500 PyObject *v;
3501 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3502 v = PyTuple_New(n);
3503 if (v == NULL)
3504 goto error;
3505 while (--n >= 0) {
3506 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3507 Py_INCREF(w);
3508 PyTuple_SET_ITEM(v, n, w);
3509 }
3510 args = v;
3511 } else {
3512 Py_INCREF(orig_args);
3513 args = orig_args;
3514 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003515 args_owned = 1;
3516 /* Take what we have of the result and let the Unicode formatting
3517 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003518 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003519 if (_PyString_Resize(&result, rescnt))
3520 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003521 fmtcnt = PyString_GET_SIZE(format) - \
3522 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003523 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3524 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003525 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003526 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003527 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003528 if (v == NULL)
3529 goto error;
3530 /* Paste what we have (result) to what the Unicode formatting
3531 function returned (v) and return the result (or error) */
3532 w = PyUnicode_Concat(result, v);
3533 Py_DECREF(result);
3534 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003535 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003536 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003537#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003538
Guido van Rossume5372401993-03-16 12:15:04 +00003539 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003540 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003541 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003542 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003543 }
Guido van Rossume5372401993-03-16 12:15:04 +00003544 return NULL;
3545}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003546
3547
Guido van Rossum2a61e741997-01-18 07:55:05 +00003548
Barry Warsaw4df762f2000-08-16 23:41:01 +00003549/* This dictionary will leak at PyString_Fini() time. That's acceptable
3550 * because PyString_Fini() specifically frees interned strings that are
3551 * only referenced by this dictionary. The CVS log entry for revision 2.45
3552 * says:
3553 *
3554 * Change the Fini function to only remove otherwise unreferenced
3555 * strings from the interned table. There are references in
3556 * hard-to-find static variables all over the interpreter, and it's not
3557 * worth trying to get rid of all those; but "uninterning" isn't fair
3558 * either and may cause subtle failures later -- so we have to keep them
3559 * in the interned table.
3560 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003561static PyObject *interned;
3562
3563void
Fred Drakeba096332000-07-09 07:04:36 +00003564PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003565{
3566 register PyStringObject *s = (PyStringObject *)(*p);
3567 PyObject *t;
3568 if (s == NULL || !PyString_Check(s))
3569 Py_FatalError("PyString_InternInPlace: strings only please!");
3570 if ((t = s->ob_sinterned) != NULL) {
3571 if (t == (PyObject *)s)
3572 return;
3573 Py_INCREF(t);
3574 *p = t;
3575 Py_DECREF(s);
3576 return;
3577 }
3578 if (interned == NULL) {
3579 interned = PyDict_New();
3580 if (interned == NULL)
3581 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003582 }
3583 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3584 Py_INCREF(t);
3585 *p = s->ob_sinterned = t;
3586 Py_DECREF(s);
3587 return;
3588 }
Tim Peters111f6092001-09-12 07:54:51 +00003589 /* Ensure that only true string objects appear in the intern dict,
3590 and as the value of ob_sinterned. */
3591 if (PyString_CheckExact(s)) {
3592 t = (PyObject *)s;
3593 if (PyDict_SetItem(interned, t, t) == 0) {
3594 s->ob_sinterned = t;
3595 return;
3596 }
3597 }
3598 else {
3599 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3600 PyString_GET_SIZE(s));
3601 if (t != NULL) {
3602 if (PyDict_SetItem(interned, t, t) == 0) {
3603 *p = s->ob_sinterned = t;
3604 Py_DECREF(s);
3605 return;
3606 }
3607 Py_DECREF(t);
3608 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003609 }
3610 PyErr_Clear();
3611}
3612
3613
3614PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003615PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003616{
3617 PyObject *s = PyString_FromString(cp);
3618 if (s == NULL)
3619 return NULL;
3620 PyString_InternInPlace(&s);
3621 return s;
3622}
3623
Guido van Rossum8cf04761997-08-02 02:57:45 +00003624void
Fred Drakeba096332000-07-09 07:04:36 +00003625PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003626{
3627 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003628 for (i = 0; i < UCHAR_MAX + 1; i++) {
3629 Py_XDECREF(characters[i]);
3630 characters[i] = NULL;
3631 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003632 Py_XDECREF(nullstring);
3633 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003634 if (interned) {
3635 int pos, changed;
3636 PyObject *key, *value;
3637 do {
3638 changed = 0;
3639 pos = 0;
3640 while (PyDict_Next(interned, &pos, &key, &value)) {
3641 if (key->ob_refcnt == 2 && key == value) {
3642 PyDict_DelItem(interned, key);
3643 changed = 1;
3644 }
3645 }
3646 } while (changed);
3647 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003648}
Barry Warsawa903ad982001-02-23 16:40:48 +00003649
Barry Warsawa903ad982001-02-23 16:40:48 +00003650void _Py_ReleaseInternedStrings(void)
3651{
3652 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003653 fprintf(stderr, "releasing interned strings\n");
3654 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003655 Py_DECREF(interned);
3656 interned = NULL;
3657 }
3658}