blob: 709c5f79864d3767305e46c38a634a7386b13938 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000017static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000018
19/*
Martin v. Löwis1f803f72002-01-16 10:53:24 +000020 For both PyString_FromString() and PyString_FromStringAndSize(), the
21 parameter `size' denotes number of characters to allocate, not counting any
22 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000023
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 For PyString_FromString(), the parameter `str' points to a null-terminated
25 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000026
Martin v. Löwis1f803f72002-01-16 10:53:24 +000027 For PyString_FromStringAndSize(), the parameter the parameter `str' is
28 either NULL or else points to a string containing at least `size' bytes. For
29 PyString_FromStringAndSize(), the string in the `str' parameter does not
30 have to be null-terminated. (Therefore it is safe to construct a substring
31 by calling `PyString_FromStringAndSize(origstring, substrlen)'.) If `str'
32 is NULL then PyString_FromStringAndSize() will allocate `size+1' bytes
33 (setting the last byte to the null terminating character) and you can fill in
34 the data yourself. If `str' is non-NULL then the resulting PyString object
35 must be treated as immutable and you must not fill in nor alter the data
36 yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000037
Martin v. Löwis1f803f72002-01-16 10:53:24 +000038 The PyObject member `op->ob_size', which denotes the number of "extra items"
39 in a variable-size object, will contain the number of bytes allocated for
40 string data, not counting the null terminating character. It is therefore
41 equal to the equal to the `size' parameter (for PyString_FromStringAndSize())
42 or the length of the string in the `str' parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000067 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000123 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
159 count = vargs;
160#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000161 /* step 1: figure out how large a buffer we need */
162 for (f = format; *f; f++) {
163 if (*f == '%') {
164 const char* p = f;
165 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
166 ;
167
168 /* skip the 'l' in %ld, since it doesn't change the
169 width. although only %d is supported (see
170 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000171 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000172 if (*f == 'l' && *(f+1) == 'd')
173 ++f;
174
175 switch (*f) {
176 case 'c':
177 (void)va_arg(count, int);
178 /* fall through... */
179 case '%':
180 n++;
181 break;
182 case 'd': case 'i': case 'x':
183 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000184 /* 20 bytes is enough to hold a 64-bit
185 integer. Decimal takes the most space.
186 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 n += 20;
188 break;
189 case 's':
190 s = va_arg(count, char*);
191 n += strlen(s);
192 break;
193 case 'p':
194 (void) va_arg(count, int);
195 /* maximum 64-bit pointer representation:
196 * 0xffffffffffffffff
197 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000198 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000199 */
200 n += 19;
201 break;
202 default:
203 /* if we stumble upon an unknown
204 formatting code, copy the rest of
205 the format string to the output
206 string. (we cannot just skip the
207 code, since there's no way to know
208 what's in the argument list) */
209 n += strlen(p);
210 goto expand;
211 }
212 } else
213 n++;
214 }
215 expand:
216 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000217 /* Since we've analyzed how much space we need for the worst case,
218 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 string = PyString_FromStringAndSize(NULL, n);
220 if (!string)
221 return NULL;
222
223 s = PyString_AsString(string);
224
225 for (f = format; *f; f++) {
226 if (*f == '%') {
227 const char* p = f++;
228 int i, longflag = 0;
229 /* parse the width.precision part (we're only
230 interested in the precision value, if any) */
231 n = 0;
232 while (isdigit(Py_CHARMASK(*f)))
233 n = (n*10) + *f++ - '0';
234 if (*f == '.') {
235 f++;
236 n = 0;
237 while (isdigit(Py_CHARMASK(*f)))
238 n = (n*10) + *f++ - '0';
239 }
240 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
241 f++;
242 /* handle the long flag, but only for %ld. others
243 can be added when necessary. */
244 if (*f == 'l' && *(f+1) == 'd') {
245 longflag = 1;
246 ++f;
247 }
248
249 switch (*f) {
250 case 'c':
251 *s++ = va_arg(vargs, int);
252 break;
253 case 'd':
254 if (longflag)
255 sprintf(s, "%ld", va_arg(vargs, long));
256 else
257 sprintf(s, "%d", va_arg(vargs, int));
258 s += strlen(s);
259 break;
260 case 'i':
261 sprintf(s, "%i", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'x':
265 sprintf(s, "%x", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 's':
269 p = va_arg(vargs, char*);
270 i = strlen(p);
271 if (n > 0 && i > n)
272 i = n;
273 memcpy(s, p, i);
274 s += i;
275 break;
276 case 'p':
277 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000278 /* %p is ill-defined: ensure leading 0x. */
279 if (s[1] == 'X')
280 s[1] = 'x';
281 else if (s[1] != 'x') {
282 memmove(s+2, s, strlen(s)+1);
283 s[0] = '0';
284 s[1] = 'x';
285 }
Barry Warsawdadace02001-08-24 18:32:06 +0000286 s += strlen(s);
287 break;
288 case '%':
289 *s++ = '%';
290 break;
291 default:
292 strcpy(s, p);
293 s += strlen(s);
294 goto end;
295 }
296 } else
297 *s++ = *f;
298 }
299
300 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000301 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000302 return string;
303}
304
305PyObject *
306PyString_FromFormat(const char *format, ...)
307{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000308 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000309 va_list vargs;
310
311#ifdef HAVE_STDARG_PROTOTYPES
312 va_start(vargs, format);
313#else
314 va_start(vargs);
315#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 ret = PyString_FromFormatV(format, vargs);
317 va_end(vargs);
318 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319}
320
321
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000322PyObject *PyString_Decode(const char *s,
323 int size,
324 const char *encoding,
325 const char *errors)
326{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000327 PyObject *v, *str;
328
329 str = PyString_FromStringAndSize(s, size);
330 if (str == NULL)
331 return NULL;
332 v = PyString_AsDecodedString(str, encoding, errors);
333 Py_DECREF(str);
334 return v;
335}
336
337PyObject *PyString_AsDecodedObject(PyObject *str,
338 const char *encoding,
339 const char *errors)
340{
341 PyObject *v;
342
343 if (!PyString_Check(str)) {
344 PyErr_BadArgument();
345 goto onError;
346 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000347
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000348 if (encoding == NULL) {
349#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000351#else
352 PyErr_SetString(PyExc_ValueError, "no encoding specified");
353 goto onError;
354#endif
355 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356
357 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000358 v = PyCodec_Decode(str, encoding, errors);
359 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361
362 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000363
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365 return NULL;
366}
367
368PyObject *PyString_AsDecodedString(PyObject *str,
369 const char *encoding,
370 const char *errors)
371{
372 PyObject *v;
373
374 v = PyString_AsDecodedObject(str, encoding, errors);
375 if (v == NULL)
376 goto onError;
377
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000378#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000379 /* Convert Unicode to a string using the default encoding */
380 if (PyUnicode_Check(v)) {
381 PyObject *temp = v;
382 v = PyUnicode_AsEncodedString(v, NULL, NULL);
383 Py_DECREF(temp);
384 if (v == NULL)
385 goto onError;
386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 if (!PyString_Check(v)) {
389 PyErr_Format(PyExc_TypeError,
390 "decoder did not return a string object (type=%.400s)",
391 v->ob_type->tp_name);
392 Py_DECREF(v);
393 goto onError;
394 }
395
396 return v;
397
398 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 return NULL;
400}
401
402PyObject *PyString_Encode(const char *s,
403 int size,
404 const char *encoding,
405 const char *errors)
406{
407 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 str = PyString_FromStringAndSize(s, size);
410 if (str == NULL)
411 return NULL;
412 v = PyString_AsEncodedString(str, encoding, errors);
413 Py_DECREF(str);
414 return v;
415}
416
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000417PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 const char *encoding,
419 const char *errors)
420{
421 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000422
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000423 if (!PyString_Check(str)) {
424 PyErr_BadArgument();
425 goto onError;
426 }
427
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000428 if (encoding == NULL) {
429#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000431#else
432 PyErr_SetString(PyExc_ValueError, "no encoding specified");
433 goto onError;
434#endif
435 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436
437 /* Encode via the codec registry */
438 v = PyCodec_Encode(str, encoding, errors);
439 if (v == NULL)
440 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000441
442 return v;
443
444 onError:
445 return NULL;
446}
447
448PyObject *PyString_AsEncodedString(PyObject *str,
449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v;
453
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000454 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000455 if (v == NULL)
456 goto onError;
457
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000458#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 /* Convert Unicode to a string using the default encoding */
460 if (PyUnicode_Check(v)) {
461 PyObject *temp = v;
462 v = PyUnicode_AsEncodedString(v, NULL, NULL);
463 Py_DECREF(temp);
464 if (v == NULL)
465 goto onError;
466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(v)) {
469 PyErr_Format(PyExc_TypeError,
470 "encoder did not return a string object (type=%.400s)",
471 v->ob_type->tp_name);
472 Py_DECREF(v);
473 goto onError;
474 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000476 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000477
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 onError:
479 return NULL;
480}
481
Guido van Rossum234f9421993-06-17 12:35:49 +0000482static void
Fred Drakeba096332000-07-09 07:04:36 +0000483string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000484{
Guido van Rossum9475a232001-10-05 20:51:39 +0000485 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000486}
487
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000488static int
489string_getsize(register PyObject *op)
490{
491 char *s;
492 int len;
493 if (PyString_AsStringAndSize(op, &s, &len))
494 return -1;
495 return len;
496}
497
498static /*const*/ char *
499string_getbuffer(register PyObject *op)
500{
501 char *s;
502 int len;
503 if (PyString_AsStringAndSize(op, &s, &len))
504 return NULL;
505 return s;
506}
507
Guido van Rossumd7047b31995-01-02 19:07:15 +0000508int
Fred Drakeba096332000-07-09 07:04:36 +0000509PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000511 if (!PyString_Check(op))
512 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000513 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
516/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000517PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519 if (!PyString_Check(op))
520 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000521 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522}
523
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000524int
525PyString_AsStringAndSize(register PyObject *obj,
526 register char **s,
527 register int *len)
528{
529 if (s == NULL) {
530 PyErr_BadInternalCall();
531 return -1;
532 }
533
534 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000536 if (PyUnicode_Check(obj)) {
537 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
538 if (obj == NULL)
539 return -1;
540 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000541 else
542#endif
543 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000544 PyErr_Format(PyExc_TypeError,
545 "expected string or Unicode object, "
546 "%.200s found", obj->ob_type->tp_name);
547 return -1;
548 }
549 }
550
551 *s = PyString_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyString_GET_SIZE(obj);
554 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected string without null bytes");
557 return -1;
558 }
559 return 0;
560}
561
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562/* Methods */
563
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564static int
Fred Drakeba096332000-07-09 07:04:36 +0000565string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566{
567 int i;
568 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000570
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000571 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000572 if (! PyString_CheckExact(op)) {
573 int ret;
574 /* A str subclass may have its own __str__ method. */
575 op = (PyStringObject *) PyObject_Str((PyObject *)op);
576 if (op == NULL)
577 return -1;
578 ret = string_print(op, fp, flags);
579 Py_DECREF(op);
580 return ret;
581 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000582 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000584 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000586
Thomas Wouters7e474022000-07-16 12:04:32 +0000587 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000588 quote = '\'';
Martin v. Löwis1f803f72002-01-16 10:53:24 +0000589 if (memchr(op->ob_sval, '\'', op->ob_size) && !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000590 quote = '"';
591
592 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593 for (i = 0; i < op->ob_size; i++) {
594 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000595 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000596 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000597 else if (c == '\t')
598 fprintf(fp, "\\t");
599 else if (c == '\n')
600 fprintf(fp, "\\n");
601 else if (c == '\r')
602 fprintf(fp, "\\r");
603 else if (c < ' ' || c >= 0x7f)
604 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000606 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000608 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000609 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000610}
611
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000612static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000613string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000614{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000615 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
616 PyObject *v;
617 if (newsize > INT_MAX) {
618 PyErr_SetString(PyExc_OverflowError,
619 "string is too large to make repr");
620 }
621 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000622 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000623 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 }
625 else {
626 register int i;
627 register char c;
628 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000629 int quote;
630
Thomas Wouters7e474022000-07-16 12:04:32 +0000631 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000632 quote = '\'';
Martin v. Löwis1f803f72002-01-16 10:53:24 +0000633 if (memchr(op->ob_sval, '\'', op->ob_size) && !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000634 quote = '"';
635
Tim Peters9161c8b2001-12-03 01:55:38 +0000636 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000637 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000638 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000639 /* There's at least enough room for a hex escape
640 and a closing quote. */
641 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000642 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000643 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000645 else if (c == '\t')
646 *p++ = '\\', *p++ = 't';
647 else if (c == '\n')
648 *p++ = '\\', *p++ = 'n';
649 else if (c == '\r')
650 *p++ = '\\', *p++ = 'r';
651 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000652 /* For performance, we don't want to call
653 PyOS_snprintf here (extra layers of
654 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000655 sprintf(p, "\\x%02x", c & 0xff);
656 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000657 }
658 else
659 *p++ = c;
660 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000661 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000662 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000663 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000664 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000665 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000666 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000667 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000668}
669
Guido van Rossum189f1df2001-05-01 16:51:53 +0000670static PyObject *
671string_str(PyObject *s)
672{
Tim Petersc9933152001-10-16 20:18:24 +0000673 assert(PyString_Check(s));
674 if (PyString_CheckExact(s)) {
675 Py_INCREF(s);
676 return s;
677 }
678 else {
679 /* Subtype -- return genuine string with the same value. */
680 PyStringObject *t = (PyStringObject *) s;
681 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
682 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000683}
684
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000685static int
Fred Drakeba096332000-07-09 07:04:36 +0000686string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687{
688 return a->ob_size;
689}
690
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000691static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000692string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000693{
694 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000695 register PyStringObject *op;
696 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000697#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000698 if (PyUnicode_Check(bb))
699 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000700#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000701 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000702 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000703 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704 return NULL;
705 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000708 if ((a->ob_size == 0 || b->ob_size == 0) &&
709 PyString_CheckExact(a) && PyString_CheckExact(b)) {
710 if (a->ob_size == 0) {
711 Py_INCREF(bb);
712 return bb;
713 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000714 Py_INCREF(a);
715 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716 }
717 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000718 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000719 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000720 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000721 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000722 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000723 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000724 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000725 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000726 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
727 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
728 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000729 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000730#undef b
731}
732
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000733static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000734string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000735{
736 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000737 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000738 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000739 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000740 if (n < 0)
741 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000742 /* watch out for overflows: the size can overflow int,
743 * and the # of bytes needed can overflow size_t
744 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000745 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000746 if (n && size / n != a->ob_size) {
747 PyErr_SetString(PyExc_OverflowError,
748 "repeated string is too long");
749 return NULL;
750 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000751 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752 Py_INCREF(a);
753 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000754 }
Tim Peters8f422462000-09-09 06:13:41 +0000755 nbytes = size * sizeof(char);
756 if (nbytes / sizeof(char) != (size_t)size ||
757 nbytes + sizeof(PyStringObject) <= nbytes) {
758 PyErr_SetString(PyExc_OverflowError,
759 "repeated string is too long");
760 return NULL;
761 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000762 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000763 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000764 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000765 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000766 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000767 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000768 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000769 for (i = 0; i < size; i += a->ob_size)
770 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
771 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000772 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000773}
774
775/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
776
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000777static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000778string_slice(register PyStringObject *a, register int i, register int j)
779 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000780{
781 if (i < 0)
782 i = 0;
783 if (j < 0)
784 j = 0; /* Avoid signed/unsigned bug in next line */
785 if (j > a->ob_size)
786 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000787 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
788 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000789 Py_INCREF(a);
790 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
792 if (j < i)
793 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000794 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000795}
796
Guido van Rossum9284a572000-03-07 15:53:43 +0000797static int
Fred Drakeba096332000-07-09 07:04:36 +0000798string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000799{
800 register char *s, *end;
801 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000802#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000803 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000804 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000805#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000806 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000807 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000808 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000809 return -1;
810 }
811 c = PyString_AsString(el)[0];
812 s = PyString_AsString(a);
813 end = s + PyString_Size(a);
814 while (s < end) {
815 if (c == *s++)
816 return 1;
817 }
818 return 0;
819}
820
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000821static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000822string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000824 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000825 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000826 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000827 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 return NULL;
829 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000830 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000831 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000832 if (v == NULL)
833 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000834 else {
835#ifdef COUNT_ALLOCS
836 one_strings++;
837#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000838 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000839 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000840 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841}
842
Martin v. Löwiscd353062001-05-24 16:56:35 +0000843static PyObject*
844string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000846 int c;
847 int len_a, len_b;
848 int min_len;
849 PyObject *result;
850
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000851 /* Make sure both arguments are strings. */
852 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000853 result = Py_NotImplemented;
854 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000855 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000856 if (a == b) {
857 switch (op) {
858 case Py_EQ:case Py_LE:case Py_GE:
859 result = Py_True;
860 goto out;
861 case Py_NE:case Py_LT:case Py_GT:
862 result = Py_False;
863 goto out;
864 }
865 }
866 if (op == Py_EQ) {
867 /* Supporting Py_NE here as well does not save
868 much time, since Py_NE is rarely used. */
869 if (a->ob_size == b->ob_size
870 && (a->ob_sval[0] == b->ob_sval[0]
871 && memcmp(a->ob_sval, b->ob_sval,
872 a->ob_size) == 0)) {
873 result = Py_True;
874 } else {
875 result = Py_False;
876 }
877 goto out;
878 }
879 len_a = a->ob_size; len_b = b->ob_size;
880 min_len = (len_a < len_b) ? len_a : len_b;
881 if (min_len > 0) {
882 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
883 if (c==0)
884 c = memcmp(a->ob_sval, b->ob_sval, min_len);
885 }else
886 c = 0;
887 if (c == 0)
888 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
889 switch (op) {
890 case Py_LT: c = c < 0; break;
891 case Py_LE: c = c <= 0; break;
892 case Py_EQ: assert(0); break; /* unreachable */
893 case Py_NE: c = c != 0; break;
894 case Py_GT: c = c > 0; break;
895 case Py_GE: c = c >= 0; break;
896 default:
897 result = Py_NotImplemented;
898 goto out;
899 }
900 result = c ? Py_True : Py_False;
901 out:
902 Py_INCREF(result);
903 return result;
904}
905
906int
907_PyString_Eq(PyObject *o1, PyObject *o2)
908{
909 PyStringObject *a, *b;
910 a = (PyStringObject*)o1;
911 b = (PyStringObject*)o2;
912 return a->ob_size == b->ob_size
913 && *a->ob_sval == *b->ob_sval
914 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915}
916
Guido van Rossum9bfef441993-03-29 10:43:31 +0000917static long
Fred Drakeba096332000-07-09 07:04:36 +0000918string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000920 register int len;
921 register unsigned char *p;
922 register long x;
923
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000924 if (a->ob_shash != -1)
925 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000926 if (a->ob_sinterned != NULL)
927 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000929 len = a->ob_size;
930 p = (unsigned char *) a->ob_sval;
931 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000932 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000933 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000934 x ^= a->ob_size;
935 if (x == -1)
936 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000937 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000938 return x;
939}
940
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000941static int
Fred Drakeba096332000-07-09 07:04:36 +0000942string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000943{
944 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000945 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000946 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000947 return -1;
948 }
949 *ptr = (void *)self->ob_sval;
950 return self->ob_size;
951}
952
953static int
Fred Drakeba096332000-07-09 07:04:36 +0000954string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000955{
Guido van Rossum045e6881997-09-08 18:30:11 +0000956 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000957 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000958 return -1;
959}
960
961static int
Fred Drakeba096332000-07-09 07:04:36 +0000962string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000963{
964 if ( lenp )
965 *lenp = self->ob_size;
966 return 1;
967}
968
Guido van Rossum1db70701998-10-08 02:18:52 +0000969static int
Fred Drakeba096332000-07-09 07:04:36 +0000970string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000971{
972 if ( index != 0 ) {
973 PyErr_SetString(PyExc_SystemError,
974 "accessing non-existent string segment");
975 return -1;
976 }
977 *ptr = self->ob_sval;
978 return self->ob_size;
979}
980
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000982 (inquiry)string_length, /*sq_length*/
983 (binaryfunc)string_concat, /*sq_concat*/
984 (intargfunc)string_repeat, /*sq_repeat*/
985 (intargfunc)string_item, /*sq_item*/
986 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000987 0, /*sq_ass_item*/
988 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000989 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000990};
991
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000992static PyBufferProcs string_as_buffer = {
993 (getreadbufferproc)string_buffer_getreadbuf,
994 (getwritebufferproc)string_buffer_getwritebuf,
995 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000996 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000997};
998
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000999
1000
1001#define LEFTSTRIP 0
1002#define RIGHTSTRIP 1
1003#define BOTHSTRIP 2
1004
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001005/* Arrays indexed by above */
1006static const char *stripname[] = {"lstrip", "rstrip", "strip"};
1007
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001008
1009static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001010split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001011{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001012 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001013 PyObject* item;
1014 PyObject *list = PyList_New(0);
1015
1016 if (list == NULL)
1017 return NULL;
1018
Guido van Rossum4c08d552000-03-10 22:55:18 +00001019 for (i = j = 0; i < len; ) {
1020 while (i < len && isspace(Py_CHARMASK(s[i])))
1021 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001022 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001023 while (i < len && !isspace(Py_CHARMASK(s[i])))
1024 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001025 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001026 if (maxsplit-- <= 0)
1027 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1029 if (item == NULL)
1030 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001031 err = PyList_Append(list, item);
1032 Py_DECREF(item);
1033 if (err < 0)
1034 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001035 while (i < len && isspace(Py_CHARMASK(s[i])))
1036 i++;
1037 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001038 }
1039 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001040 if (j < len) {
1041 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1042 if (item == NULL)
1043 goto finally;
1044 err = PyList_Append(list, item);
1045 Py_DECREF(item);
1046 if (err < 0)
1047 goto finally;
1048 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001049 return list;
1050 finally:
1051 Py_DECREF(list);
1052 return NULL;
1053}
1054
1055
1056static char split__doc__[] =
1057"S.split([sep [,maxsplit]]) -> list of strings\n\
1058\n\
1059Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001060delimiter string. If maxsplit is given, at most maxsplit\n\
1061splits are done. If sep is not specified, any whitespace string\n\
1062is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001063
1064static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001065string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001066{
1067 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001068 int maxsplit = -1;
1069 const char *s = PyString_AS_STRING(self), *sub;
1070 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001071
Guido van Rossum4c08d552000-03-10 22:55:18 +00001072 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001073 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001074 if (maxsplit < 0)
1075 maxsplit = INT_MAX;
1076 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001077 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001078 if (PyString_Check(subobj)) {
1079 sub = PyString_AS_STRING(subobj);
1080 n = PyString_GET_SIZE(subobj);
1081 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001082#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001083 else if (PyUnicode_Check(subobj))
1084 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001085#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001086 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1087 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088 if (n == 0) {
1089 PyErr_SetString(PyExc_ValueError, "empty separator");
1090 return NULL;
1091 }
1092
1093 list = PyList_New(0);
1094 if (list == NULL)
1095 return NULL;
1096
1097 i = j = 0;
1098 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001099 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001100 if (maxsplit-- <= 0)
1101 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001102 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1103 if (item == NULL)
1104 goto fail;
1105 err = PyList_Append(list, item);
1106 Py_DECREF(item);
1107 if (err < 0)
1108 goto fail;
1109 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001110 }
1111 else
1112 i++;
1113 }
1114 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1115 if (item == NULL)
1116 goto fail;
1117 err = PyList_Append(list, item);
1118 Py_DECREF(item);
1119 if (err < 0)
1120 goto fail;
1121
1122 return list;
1123
1124 fail:
1125 Py_DECREF(list);
1126 return NULL;
1127}
1128
1129
1130static char join__doc__[] =
1131"S.join(sequence) -> string\n\
1132\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001133Return a string which is the concatenation of the strings in the\n\
1134sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001135
1136static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001137string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001138{
1139 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001140 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001142 char *p;
1143 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001144 size_t sz = 0;
1145 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001146 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001147
Tim Peters19fe14e2001-01-19 03:03:47 +00001148 seq = PySequence_Fast(orig, "");
1149 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001150 if (PyErr_ExceptionMatches(PyExc_TypeError))
1151 PyErr_Format(PyExc_TypeError,
1152 "sequence expected, %.80s found",
1153 orig->ob_type->tp_name);
1154 return NULL;
1155 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001156
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001157 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001158 if (seqlen == 0) {
1159 Py_DECREF(seq);
1160 return PyString_FromString("");
1161 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001162 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001163 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001164 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1165 PyErr_Format(PyExc_TypeError,
1166 "sequence item 0: expected string,"
1167 " %.80s found",
1168 item->ob_type->tp_name);
1169 Py_DECREF(seq);
1170 return NULL;
1171 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001172 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001173 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001174 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001175 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001176
Tim Peters19fe14e2001-01-19 03:03:47 +00001177 /* There are at least two things to join. Do a pre-pass to figure out
1178 * the total amount of space we'll need (sz), see whether any argument
1179 * is absurd, and defer to the Unicode join if appropriate.
1180 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001181 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001182 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001183 item = PySequence_Fast_GET_ITEM(seq, i);
1184 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001185#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001186 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001187 /* Defer to Unicode join.
1188 * CAUTION: There's no gurantee that the
1189 * original sequence can be iterated over
1190 * again, so we must pass seq here.
1191 */
1192 PyObject *result;
1193 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001194 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001195 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001196 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001197#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001198 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001199 "sequence item %i: expected string,"
1200 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001201 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001202 Py_DECREF(seq);
1203 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001204 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001205 sz += PyString_GET_SIZE(item);
1206 if (i != 0)
1207 sz += seplen;
1208 if (sz < old_sz || sz > INT_MAX) {
1209 PyErr_SetString(PyExc_OverflowError,
1210 "join() is too long for a Python string");
1211 Py_DECREF(seq);
1212 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001214 }
1215
1216 /* Allocate result space. */
1217 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1218 if (res == NULL) {
1219 Py_DECREF(seq);
1220 return NULL;
1221 }
1222
1223 /* Catenate everything. */
1224 p = PyString_AS_STRING(res);
1225 for (i = 0; i < seqlen; ++i) {
1226 size_t n;
1227 item = PySequence_Fast_GET_ITEM(seq, i);
1228 n = PyString_GET_SIZE(item);
1229 memcpy(p, PyString_AS_STRING(item), n);
1230 p += n;
1231 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001232 memcpy(p, sep, seplen);
1233 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001234 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001235 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001236
Jeremy Hylton49048292000-07-11 03:28:17 +00001237 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001238 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001239}
1240
Tim Peters52e155e2001-06-16 05:42:57 +00001241PyObject *
1242_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001243{
Tim Petersa7259592001-06-16 05:11:17 +00001244 assert(sep != NULL && PyString_Check(sep));
1245 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001246 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001247}
1248
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001249static long
Fred Drakeba096332000-07-09 07:04:36 +00001250string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001251{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001252 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001253 int len = PyString_GET_SIZE(self);
1254 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001255 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001256
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001257 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001258 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001259 return -2;
1260 if (PyString_Check(subobj)) {
1261 sub = PyString_AS_STRING(subobj);
1262 n = PyString_GET_SIZE(subobj);
1263 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001264#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001265 else if (PyUnicode_Check(subobj))
1266 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001267#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001268 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001269 return -2;
1270
1271 if (last > len)
1272 last = len;
1273 if (last < 0)
1274 last += len;
1275 if (last < 0)
1276 last = 0;
1277 if (i < 0)
1278 i += len;
1279 if (i < 0)
1280 i = 0;
1281
Guido van Rossum4c08d552000-03-10 22:55:18 +00001282 if (dir > 0) {
1283 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001284 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001285 last -= n;
1286 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001287 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001288 return (long)i;
1289 }
1290 else {
1291 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001292
Guido van Rossum4c08d552000-03-10 22:55:18 +00001293 if (n == 0 && i <= last)
1294 return (long)last;
1295 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001296 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001297 return (long)j;
1298 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001299
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001300 return -1;
1301}
1302
1303
1304static char find__doc__[] =
1305"S.find(sub [,start [,end]]) -> int\n\
1306\n\
1307Return the lowest index in S where substring sub is found,\n\
1308such that sub is contained within s[start,end]. Optional\n\
1309arguments start and end are interpreted as in slice notation.\n\
1310\n\
1311Return -1 on failure.";
1312
1313static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001314string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317 if (result == -2)
1318 return NULL;
1319 return PyInt_FromLong(result);
1320}
1321
1322
1323static char index__doc__[] =
1324"S.index(sub [,start [,end]]) -> int\n\
1325\n\
1326Like S.find() but raise ValueError when the substring is not found.";
1327
1328static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001329string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001331 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332 if (result == -2)
1333 return NULL;
1334 if (result == -1) {
1335 PyErr_SetString(PyExc_ValueError,
1336 "substring not found in string.index");
1337 return NULL;
1338 }
1339 return PyInt_FromLong(result);
1340}
1341
1342
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001343static char rfind__doc__[] =
1344"S.rfind(sub [,start [,end]]) -> int\n\
1345\n\
1346Return the highest index in S where substring sub is found,\n\
1347such that sub is contained within s[start,end]. Optional\n\
1348arguments start and end are interpreted as in slice notation.\n\
1349\n\
1350Return -1 on failure.";
1351
1352static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001353string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001356 if (result == -2)
1357 return NULL;
1358 return PyInt_FromLong(result);
1359}
1360
1361
1362static char rindex__doc__[] =
1363"S.rindex(sub [,start [,end]]) -> int\n\
1364\n\
1365Like S.rfind() but raise ValueError when the substring is not found.";
1366
1367static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001368string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001370 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371 if (result == -2)
1372 return NULL;
1373 if (result == -1) {
1374 PyErr_SetString(PyExc_ValueError,
1375 "substring not found in string.rindex");
1376 return NULL;
1377 }
1378 return PyInt_FromLong(result);
1379}
1380
1381
1382static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001383do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1384{
1385 char *s = PyString_AS_STRING(self);
1386 int len = PyString_GET_SIZE(self);
1387 char *sep = PyString_AS_STRING(sepobj);
1388 int seplen = PyString_GET_SIZE(sepobj);
1389 int i, j;
1390
1391 i = 0;
1392 if (striptype != RIGHTSTRIP) {
1393 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1394 i++;
1395 }
1396 }
1397
1398 j = len;
1399 if (striptype != LEFTSTRIP) {
1400 do {
1401 j--;
1402 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1403 j++;
1404 }
1405
1406 if (i == 0 && j == len && PyString_CheckExact(self)) {
1407 Py_INCREF(self);
1408 return (PyObject*)self;
1409 }
1410 else
1411 return PyString_FromStringAndSize(s+i, j-i);
1412}
1413
1414
1415static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001416do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417{
1418 char *s = PyString_AS_STRING(self);
1419 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421 i = 0;
1422 if (striptype != RIGHTSTRIP) {
1423 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1424 i++;
1425 }
1426 }
1427
1428 j = len;
1429 if (striptype != LEFTSTRIP) {
1430 do {
1431 j--;
1432 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1433 j++;
1434 }
1435
Tim Peters8fa5dd02001-09-12 02:18:30 +00001436 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437 Py_INCREF(self);
1438 return (PyObject*)self;
1439 }
1440 else
1441 return PyString_FromStringAndSize(s+i, j-i);
1442}
1443
1444
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001445static PyObject *
1446do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1447{
1448 PyObject *sep = NULL;
1449
1450 if (!PyArg_ParseTuple(args, "|O:[lr]strip", &sep))
1451 return NULL;
1452
1453 if (sep != NULL && sep != Py_None) {
1454 /* XXX What about Unicode? */
1455 if (!PyString_Check(sep)) {
1456 PyErr_Format(PyExc_TypeError,
1457 "%s arg must be None or string",
1458 stripname[striptype]);
1459 return NULL;
1460 }
1461 return do_xstrip(self, striptype, sep);
1462 }
1463
1464 return do_strip(self, striptype);
1465}
1466
1467
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468static char strip__doc__[] =
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001469"S.strip([sep]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470\n\
1471Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001472whitespace removed.\n\
1473If sep is given and not None, remove characters in sep instead.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474
1475static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001476string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001477{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001478 if (PyTuple_GET_SIZE(args) == 0)
1479 return do_strip(self, BOTHSTRIP); /* Common case */
1480 else
1481 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001482}
1483
1484
1485static char lstrip__doc__[] =
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001486"S.lstrip([sep]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001488Return a copy of the string S with leading whitespace removed.\n\
1489If sep is given and not None, remove characters in sep instead.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490
1491static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001492string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001494 if (PyTuple_GET_SIZE(args) == 0)
1495 return do_strip(self, LEFTSTRIP); /* Common case */
1496 else
1497 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498}
1499
1500
1501static char rstrip__doc__[] =
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001502"S.rstrip([sep]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001504Return a copy of the string S with trailing whitespace removed.\n\
1505If sep is given and not None, remove characters in sep instead.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506
1507static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001508string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001510 if (PyTuple_GET_SIZE(args) == 0)
1511 return do_strip(self, RIGHTSTRIP); /* Common case */
1512 else
1513 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514}
1515
1516
1517static char lower__doc__[] =
1518"S.lower() -> string\n\
1519\n\
1520Return a copy of the string S converted to lowercase.";
1521
1522static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001523string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524{
1525 char *s = PyString_AS_STRING(self), *s_new;
1526 int i, n = PyString_GET_SIZE(self);
1527 PyObject *new;
1528
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001529 new = PyString_FromStringAndSize(NULL, n);
1530 if (new == NULL)
1531 return NULL;
1532 s_new = PyString_AsString(new);
1533 for (i = 0; i < n; i++) {
1534 int c = Py_CHARMASK(*s++);
1535 if (isupper(c)) {
1536 *s_new = tolower(c);
1537 } else
1538 *s_new = c;
1539 s_new++;
1540 }
1541 return new;
1542}
1543
1544
1545static char upper__doc__[] =
1546"S.upper() -> string\n\
1547\n\
1548Return a copy of the string S converted to uppercase.";
1549
1550static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001551string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001552{
1553 char *s = PyString_AS_STRING(self), *s_new;
1554 int i, n = PyString_GET_SIZE(self);
1555 PyObject *new;
1556
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001557 new = PyString_FromStringAndSize(NULL, n);
1558 if (new == NULL)
1559 return NULL;
1560 s_new = PyString_AsString(new);
1561 for (i = 0; i < n; i++) {
1562 int c = Py_CHARMASK(*s++);
1563 if (islower(c)) {
1564 *s_new = toupper(c);
1565 } else
1566 *s_new = c;
1567 s_new++;
1568 }
1569 return new;
1570}
1571
1572
Guido van Rossum4c08d552000-03-10 22:55:18 +00001573static char title__doc__[] =
1574"S.title() -> string\n\
1575\n\
1576Return a titlecased version of S, i.e. words start with uppercase\n\
1577characters, all remaining cased characters have lowercase.";
1578
1579static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001580string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001581{
1582 char *s = PyString_AS_STRING(self), *s_new;
1583 int i, n = PyString_GET_SIZE(self);
1584 int previous_is_cased = 0;
1585 PyObject *new;
1586
Guido van Rossum4c08d552000-03-10 22:55:18 +00001587 new = PyString_FromStringAndSize(NULL, n);
1588 if (new == NULL)
1589 return NULL;
1590 s_new = PyString_AsString(new);
1591 for (i = 0; i < n; i++) {
1592 int c = Py_CHARMASK(*s++);
1593 if (islower(c)) {
1594 if (!previous_is_cased)
1595 c = toupper(c);
1596 previous_is_cased = 1;
1597 } else if (isupper(c)) {
1598 if (previous_is_cased)
1599 c = tolower(c);
1600 previous_is_cased = 1;
1601 } else
1602 previous_is_cased = 0;
1603 *s_new++ = c;
1604 }
1605 return new;
1606}
1607
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608static char capitalize__doc__[] =
1609"S.capitalize() -> string\n\
1610\n\
1611Return a copy of the string S with only its first character\n\
1612capitalized.";
1613
1614static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001615string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001616{
1617 char *s = PyString_AS_STRING(self), *s_new;
1618 int i, n = PyString_GET_SIZE(self);
1619 PyObject *new;
1620
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621 new = PyString_FromStringAndSize(NULL, n);
1622 if (new == NULL)
1623 return NULL;
1624 s_new = PyString_AsString(new);
1625 if (0 < n) {
1626 int c = Py_CHARMASK(*s++);
1627 if (islower(c))
1628 *s_new = toupper(c);
1629 else
1630 *s_new = c;
1631 s_new++;
1632 }
1633 for (i = 1; i < n; i++) {
1634 int c = Py_CHARMASK(*s++);
1635 if (isupper(c))
1636 *s_new = tolower(c);
1637 else
1638 *s_new = c;
1639 s_new++;
1640 }
1641 return new;
1642}
1643
1644
1645static char count__doc__[] =
1646"S.count(sub[, start[, end]]) -> int\n\
1647\n\
1648Return the number of occurrences of substring sub in string\n\
1649S[start:end]. Optional arguments start and end are\n\
1650interpreted as in slice notation.";
1651
1652static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001653string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001655 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656 int len = PyString_GET_SIZE(self), n;
1657 int i = 0, last = INT_MAX;
1658 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001659 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001660
Guido van Rossumc6821402000-05-08 14:08:05 +00001661 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1662 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001664
Guido van Rossum4c08d552000-03-10 22:55:18 +00001665 if (PyString_Check(subobj)) {
1666 sub = PyString_AS_STRING(subobj);
1667 n = PyString_GET_SIZE(subobj);
1668 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001669#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001670 else if (PyUnicode_Check(subobj)) {
1671 int count;
1672 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1673 if (count == -1)
1674 return NULL;
1675 else
1676 return PyInt_FromLong((long) count);
1677 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001678#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1680 return NULL;
1681
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001682 if (last > len)
1683 last = len;
1684 if (last < 0)
1685 last += len;
1686 if (last < 0)
1687 last = 0;
1688 if (i < 0)
1689 i += len;
1690 if (i < 0)
1691 i = 0;
1692 m = last + 1 - n;
1693 if (n == 0)
1694 return PyInt_FromLong((long) (m-i));
1695
1696 r = 0;
1697 while (i < m) {
1698 if (!memcmp(s+i, sub, n)) {
1699 r++;
1700 i += n;
1701 } else {
1702 i++;
1703 }
1704 }
1705 return PyInt_FromLong((long) r);
1706}
1707
1708
1709static char swapcase__doc__[] =
1710"S.swapcase() -> string\n\
1711\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001712Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001713converted to lowercase and vice versa.";
1714
1715static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001716string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717{
1718 char *s = PyString_AS_STRING(self), *s_new;
1719 int i, n = PyString_GET_SIZE(self);
1720 PyObject *new;
1721
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001722 new = PyString_FromStringAndSize(NULL, n);
1723 if (new == NULL)
1724 return NULL;
1725 s_new = PyString_AsString(new);
1726 for (i = 0; i < n; i++) {
1727 int c = Py_CHARMASK(*s++);
1728 if (islower(c)) {
1729 *s_new = toupper(c);
1730 }
1731 else if (isupper(c)) {
1732 *s_new = tolower(c);
1733 }
1734 else
1735 *s_new = c;
1736 s_new++;
1737 }
1738 return new;
1739}
1740
1741
1742static char translate__doc__[] =
1743"S.translate(table [,deletechars]) -> string\n\
1744\n\
1745Return a copy of the string S, where all characters occurring\n\
1746in the optional argument deletechars are removed, and the\n\
1747remaining characters have been mapped through the given\n\
1748translation table, which must be a string of length 256.";
1749
1750static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001751string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753 register char *input, *output;
1754 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755 register int i, c, changed = 0;
1756 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001757 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 int inlen, tablen, dellen = 0;
1759 PyObject *result;
1760 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001761 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762
Guido van Rossum4c08d552000-03-10 22:55:18 +00001763 if (!PyArg_ParseTuple(args, "O|O:translate",
1764 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001766
1767 if (PyString_Check(tableobj)) {
1768 table1 = PyString_AS_STRING(tableobj);
1769 tablen = PyString_GET_SIZE(tableobj);
1770 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001771#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001772 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001773 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001774 parameter; instead a mapping to None will cause characters
1775 to be deleted. */
1776 if (delobj != NULL) {
1777 PyErr_SetString(PyExc_TypeError,
1778 "deletions are implemented differently for unicode");
1779 return NULL;
1780 }
1781 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1782 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001783#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001784 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001785 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001786
1787 if (delobj != NULL) {
1788 if (PyString_Check(delobj)) {
1789 del_table = PyString_AS_STRING(delobj);
1790 dellen = PyString_GET_SIZE(delobj);
1791 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001792#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001793 else if (PyUnicode_Check(delobj)) {
1794 PyErr_SetString(PyExc_TypeError,
1795 "deletions are implemented differently for unicode");
1796 return NULL;
1797 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001798#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001799 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1800 return NULL;
1801
1802 if (tablen != 256) {
1803 PyErr_SetString(PyExc_ValueError,
1804 "translation table must be 256 characters long");
1805 return NULL;
1806 }
1807 }
1808 else {
1809 del_table = NULL;
1810 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811 }
1812
1813 table = table1;
1814 inlen = PyString_Size(input_obj);
1815 result = PyString_FromStringAndSize((char *)NULL, inlen);
1816 if (result == NULL)
1817 return NULL;
1818 output_start = output = PyString_AsString(result);
1819 input = PyString_AsString(input_obj);
1820
1821 if (dellen == 0) {
1822 /* If no deletions are required, use faster code */
1823 for (i = inlen; --i >= 0; ) {
1824 c = Py_CHARMASK(*input++);
1825 if (Py_CHARMASK((*output++ = table[c])) != c)
1826 changed = 1;
1827 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001828 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829 return result;
1830 Py_DECREF(result);
1831 Py_INCREF(input_obj);
1832 return input_obj;
1833 }
1834
1835 for (i = 0; i < 256; i++)
1836 trans_table[i] = Py_CHARMASK(table[i]);
1837
1838 for (i = 0; i < dellen; i++)
1839 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1840
1841 for (i = inlen; --i >= 0; ) {
1842 c = Py_CHARMASK(*input++);
1843 if (trans_table[c] != -1)
1844 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1845 continue;
1846 changed = 1;
1847 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001848 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849 Py_DECREF(result);
1850 Py_INCREF(input_obj);
1851 return input_obj;
1852 }
1853 /* Fix the size of the resulting string */
1854 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1855 return NULL;
1856 return result;
1857}
1858
1859
1860/* What follows is used for implementing replace(). Perry Stoll. */
1861
1862/*
1863 mymemfind
1864
1865 strstr replacement for arbitrary blocks of memory.
1866
Barry Warsaw51ac5802000-03-20 16:36:48 +00001867 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868 contents of memory pointed to by PAT. Returns the index into MEM if
1869 found, or -1 if not found. If len of PAT is greater than length of
1870 MEM, the function returns -1.
1871*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001872static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001873mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001874{
1875 register int ii;
1876
1877 /* pattern can not occur in the last pat_len-1 chars */
1878 len -= pat_len;
1879
1880 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001881 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882 return ii;
1883 }
1884 }
1885 return -1;
1886}
1887
1888/*
1889 mymemcnt
1890
1891 Return the number of distinct times PAT is found in MEM.
1892 meaning mem=1111 and pat==11 returns 2.
1893 mem=11111 and pat==11 also return 2.
1894 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001895static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001896mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897{
1898 register int offset = 0;
1899 int nfound = 0;
1900
1901 while (len >= 0) {
1902 offset = mymemfind(mem, len, pat, pat_len);
1903 if (offset == -1)
1904 break;
1905 mem += offset + pat_len;
1906 len -= offset + pat_len;
1907 nfound++;
1908 }
1909 return nfound;
1910}
1911
1912/*
1913 mymemreplace
1914
Thomas Wouters7e474022000-07-16 12:04:32 +00001915 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916 replaced with SUB.
1917
Thomas Wouters7e474022000-07-16 12:04:32 +00001918 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001919 of PAT in STR, then the original string is returned. Otherwise, a new
1920 string is allocated here and returned.
1921
1922 on return, out_len is:
1923 the length of output string, or
1924 -1 if the input string is returned, or
1925 unchanged if an error occurs (no memory).
1926
1927 return value is:
1928 the new string allocated locally, or
1929 NULL if an error occurred.
1930*/
1931static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001932mymemreplace(const char *str, int len, /* input string */
1933 const char *pat, int pat_len, /* pattern string to find */
1934 const char *sub, int sub_len, /* substitution string */
1935 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001936 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937{
1938 char *out_s;
1939 char *new_s;
1940 int nfound, offset, new_len;
1941
1942 if (len == 0 || pat_len > len)
1943 goto return_same;
1944
1945 /* find length of output string */
1946 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001947 if (count < 0)
1948 count = INT_MAX;
1949 else if (nfound > count)
1950 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951 if (nfound == 0)
1952 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001953
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001955 if (new_len == 0) {
1956 /* Have to allocate something for the caller to free(). */
1957 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001958 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001959 return NULL;
1960 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001962 else {
1963 assert(new_len > 0);
1964 new_s = (char *)PyMem_MALLOC(new_len);
1965 if (new_s == NULL)
1966 return NULL;
1967 out_s = new_s;
1968
Tim Peters9c012af2001-05-10 00:32:57 +00001969 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001970 /* find index of next instance of pattern */
1971 offset = mymemfind(str, len, pat, pat_len);
1972 if (offset == -1)
1973 break;
1974
1975 /* copy non matching part of input string */
1976 memcpy(new_s, str, offset);
1977 str += offset + pat_len;
1978 len -= offset + pat_len;
1979
1980 /* copy substitute into the output string */
1981 new_s += offset;
1982 memcpy(new_s, sub, sub_len);
1983 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001984 }
1985 /* copy any remaining values into output string */
1986 if (len > 0)
1987 memcpy(new_s, str, len);
1988 }
1989 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990 return out_s;
1991
1992 return_same:
1993 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001994 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995}
1996
1997
1998static char replace__doc__[] =
1999"S.replace (old, new[, maxsplit]) -> string\n\
2000\n\
2001Return a copy of string S with all occurrences of substring\n\
2002old replaced by new. If the optional argument maxsplit is\n\
2003given, only the first maxsplit occurrences are replaced.";
2004
2005static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002006string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002008 const char *str = PyString_AS_STRING(self), *sub, *repl;
2009 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002010 const int len = PyString_GET_SIZE(self);
2011 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002012 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002014 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015
Guido van Rossum4c08d552000-03-10 22:55:18 +00002016 if (!PyArg_ParseTuple(args, "OO|i:replace",
2017 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002019
2020 if (PyString_Check(subobj)) {
2021 sub = PyString_AS_STRING(subobj);
2022 sub_len = PyString_GET_SIZE(subobj);
2023 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002024#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002025 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002026 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002027 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002028#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002029 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2030 return NULL;
2031
2032 if (PyString_Check(replobj)) {
2033 repl = PyString_AS_STRING(replobj);
2034 repl_len = PyString_GET_SIZE(replobj);
2035 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002036#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002037 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002038 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002039 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002040#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002041 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2042 return NULL;
2043
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002044 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002045 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046 return NULL;
2047 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002048 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002049 if (new_s == NULL) {
2050 PyErr_NoMemory();
2051 return NULL;
2052 }
2053 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002054 if (PyString_CheckExact(self)) {
2055 /* we're returning another reference to self */
2056 new = (PyObject*)self;
2057 Py_INCREF(new);
2058 }
2059 else {
2060 new = PyString_FromStringAndSize(str, len);
2061 if (new == NULL)
2062 return NULL;
2063 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 }
2065 else {
2066 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002067 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002068 }
2069 return new;
2070}
2071
2072
2073static char startswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002074"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002076Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077optional start, test S beginning at that position. With optional end, stop\n\
2078comparing S at that position.";
2079
2080static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002081string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002082{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002083 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086 int plen;
2087 int start = 0;
2088 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002089 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090
Guido van Rossumc6821402000-05-08 14:08:05 +00002091 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2092 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002093 return NULL;
2094 if (PyString_Check(subobj)) {
2095 prefix = PyString_AS_STRING(subobj);
2096 plen = PyString_GET_SIZE(subobj);
2097 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002098#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002099 else if (PyUnicode_Check(subobj)) {
2100 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002101 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002102 subobj, start, end, -1);
2103 if (rc == -1)
2104 return NULL;
2105 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002106 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002107 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002108#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110 return NULL;
2111
2112 /* adopt Java semantics for index out of range. it is legal for
2113 * offset to be == plen, but this only returns true if prefix is
2114 * the empty string.
2115 */
2116 if (start < 0 || start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002117 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002118
2119 if (!memcmp(str+start, prefix, plen)) {
2120 /* did the match end after the specified end? */
2121 if (end < 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002122 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123 else if (end - start < plen)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002124 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002126 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002128 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129}
2130
2131
2132static char endswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002133"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002135Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136optional start, test S beginning at that position. With optional end, stop\n\
2137comparing S at that position.";
2138
2139static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002140string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002142 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002144 const char* suffix;
2145 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146 int start = 0;
2147 int end = -1;
2148 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002149 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150
Guido van Rossumc6821402000-05-08 14:08:05 +00002151 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2152 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002153 return NULL;
2154 if (PyString_Check(subobj)) {
2155 suffix = PyString_AS_STRING(subobj);
2156 slen = PyString_GET_SIZE(subobj);
2157 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002158#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002159 else if (PyUnicode_Check(subobj)) {
2160 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002161 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002162 subobj, start, end, +1);
2163 if (rc == -1)
2164 return NULL;
2165 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002166 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002167 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002168#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002169 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 return NULL;
2171
Guido van Rossum4c08d552000-03-10 22:55:18 +00002172 if (start < 0 || start > len || slen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002173 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174
2175 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002177
Guido van Rossum4c08d552000-03-10 22:55:18 +00002178 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002179 return PyBool_FromLong(1);
2180 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181}
2182
2183
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002184static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002185"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002186\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002187Encodes S using the codec registered for encoding. encoding defaults\n\
2188to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002189handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2190a ValueError. Other possible values are 'ignore' and 'replace'.";
2191
2192static PyObject *
2193string_encode(PyStringObject *self, PyObject *args)
2194{
2195 char *encoding = NULL;
2196 char *errors = NULL;
2197 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2198 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002199 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2200}
2201
2202
2203static char decode__doc__[] =
2204"S.decode([encoding[,errors]]) -> object\n\
2205\n\
2206Decodes S using the codec registered for encoding. encoding defaults\n\
2207to the default encoding. errors may be given to set a different error\n\
2208handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2209a ValueError. Other possible values are 'ignore' and 'replace'.";
2210
2211static PyObject *
2212string_decode(PyStringObject *self, PyObject *args)
2213{
2214 char *encoding = NULL;
2215 char *errors = NULL;
2216 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2217 return NULL;
2218 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002219}
2220
2221
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222static char expandtabs__doc__[] =
2223"S.expandtabs([tabsize]) -> string\n\
2224\n\
2225Return a copy of S where all tab characters are expanded using spaces.\n\
2226If tabsize is not given, a tab size of 8 characters is assumed.";
2227
2228static PyObject*
2229string_expandtabs(PyStringObject *self, PyObject *args)
2230{
2231 const char *e, *p;
2232 char *q;
2233 int i, j;
2234 PyObject *u;
2235 int tabsize = 8;
2236
2237 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2238 return NULL;
2239
Thomas Wouters7e474022000-07-16 12:04:32 +00002240 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241 i = j = 0;
2242 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2243 for (p = PyString_AS_STRING(self); p < e; p++)
2244 if (*p == '\t') {
2245 if (tabsize > 0)
2246 j += tabsize - (j % tabsize);
2247 }
2248 else {
2249 j++;
2250 if (*p == '\n' || *p == '\r') {
2251 i += j;
2252 j = 0;
2253 }
2254 }
2255
2256 /* Second pass: create output string and fill it */
2257 u = PyString_FromStringAndSize(NULL, i + j);
2258 if (!u)
2259 return NULL;
2260
2261 j = 0;
2262 q = PyString_AS_STRING(u);
2263
2264 for (p = PyString_AS_STRING(self); p < e; p++)
2265 if (*p == '\t') {
2266 if (tabsize > 0) {
2267 i = tabsize - (j % tabsize);
2268 j += i;
2269 while (i--)
2270 *q++ = ' ';
2271 }
2272 }
2273 else {
2274 j++;
2275 *q++ = *p;
2276 if (*p == '\n' || *p == '\r')
2277 j = 0;
2278 }
2279
2280 return u;
2281}
2282
Tim Peters8fa5dd02001-09-12 02:18:30 +00002283static PyObject *
2284pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002285{
2286 PyObject *u;
2287
2288 if (left < 0)
2289 left = 0;
2290 if (right < 0)
2291 right = 0;
2292
Tim Peters8fa5dd02001-09-12 02:18:30 +00002293 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294 Py_INCREF(self);
2295 return (PyObject *)self;
2296 }
2297
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002298 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002299 left + PyString_GET_SIZE(self) + right);
2300 if (u) {
2301 if (left)
2302 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002303 memcpy(PyString_AS_STRING(u) + left,
2304 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305 PyString_GET_SIZE(self));
2306 if (right)
2307 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2308 fill, right);
2309 }
2310
2311 return u;
2312}
2313
2314static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002315"S.ljust(width) -> string\n"
2316"\n"
2317"Return S left justified in a string of length width. Padding is\n"
2318"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002319
2320static PyObject *
2321string_ljust(PyStringObject *self, PyObject *args)
2322{
2323 int width;
2324 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2325 return NULL;
2326
Tim Peters8fa5dd02001-09-12 02:18:30 +00002327 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002328 Py_INCREF(self);
2329 return (PyObject*) self;
2330 }
2331
2332 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2333}
2334
2335
2336static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002337"S.rjust(width) -> string\n"
2338"\n"
2339"Return S right justified in a string of length width. Padding is\n"
2340"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341
2342static PyObject *
2343string_rjust(PyStringObject *self, PyObject *args)
2344{
2345 int width;
2346 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2347 return NULL;
2348
Tim Peters8fa5dd02001-09-12 02:18:30 +00002349 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002350 Py_INCREF(self);
2351 return (PyObject*) self;
2352 }
2353
2354 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2355}
2356
2357
2358static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002359"S.center(width) -> string\n"
2360"\n"
2361"Return S centered in a string of length width. Padding is done\n"
2362"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002363
2364static PyObject *
2365string_center(PyStringObject *self, PyObject *args)
2366{
2367 int marg, left;
2368 int width;
2369
2370 if (!PyArg_ParseTuple(args, "i:center", &width))
2371 return NULL;
2372
Tim Peters8fa5dd02001-09-12 02:18:30 +00002373 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 Py_INCREF(self);
2375 return (PyObject*) self;
2376 }
2377
2378 marg = width - PyString_GET_SIZE(self);
2379 left = marg / 2 + (marg & width & 1);
2380
2381 return pad(self, left, marg - left, ' ');
2382}
2383
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384static char isspace__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002385"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002386"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002387"Return True if there are only whitespace characters in S,\n"
2388"False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002389
2390static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002391string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392{
Fred Drakeba096332000-07-09 07:04:36 +00002393 register const unsigned char *p
2394 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002395 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002396
Guido van Rossum4c08d552000-03-10 22:55:18 +00002397 /* Shortcut for single character strings */
2398 if (PyString_GET_SIZE(self) == 1 &&
2399 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002400 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002401
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002402 /* Special case for empty strings */
2403 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002404 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002405
Guido van Rossum4c08d552000-03-10 22:55:18 +00002406 e = p + PyString_GET_SIZE(self);
2407 for (; p < e; p++) {
2408 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002409 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002410 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002411 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002412}
2413
2414
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002415static char isalpha__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002416"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002417\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002418Return True if all characters in S are alphabetic\n\
2419and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002420
2421static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002422string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002423{
Fred Drakeba096332000-07-09 07:04:36 +00002424 register const unsigned char *p
2425 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002426 register const unsigned char *e;
2427
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002428 /* Shortcut for single character strings */
2429 if (PyString_GET_SIZE(self) == 1 &&
2430 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002431 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002432
2433 /* Special case for empty strings */
2434 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002435 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002436
2437 e = p + PyString_GET_SIZE(self);
2438 for (; p < e; p++) {
2439 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002440 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002441 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002442 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002443}
2444
2445
2446static char isalnum__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002447"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002448\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002449Return True if all characters in S are alphanumeric\n\
2450and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002451
2452static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002453string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002454{
Fred Drakeba096332000-07-09 07:04:36 +00002455 register const unsigned char *p
2456 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002457 register const unsigned char *e;
2458
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002459 /* Shortcut for single character strings */
2460 if (PyString_GET_SIZE(self) == 1 &&
2461 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002462 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002463
2464 /* Special case for empty strings */
2465 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002466 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002467
2468 e = p + PyString_GET_SIZE(self);
2469 for (; p < e; p++) {
2470 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002471 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002472 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002473 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002474}
2475
2476
Guido van Rossum4c08d552000-03-10 22:55:18 +00002477static char isdigit__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002478"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002479\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002480Return True if there are only digit characters in S,\n\
2481False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002482
2483static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002484string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002485{
Fred Drakeba096332000-07-09 07:04:36 +00002486 register const unsigned char *p
2487 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002488 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002489
Guido van Rossum4c08d552000-03-10 22:55:18 +00002490 /* Shortcut for single character strings */
2491 if (PyString_GET_SIZE(self) == 1 &&
2492 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002493 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002494
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002495 /* Special case for empty strings */
2496 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002497 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002498
Guido van Rossum4c08d552000-03-10 22:55:18 +00002499 e = p + PyString_GET_SIZE(self);
2500 for (; p < e; p++) {
2501 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002502 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002503 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002504 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002505}
2506
2507
2508static char islower__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002509"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002510\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002511Return True if all cased characters in S are lowercase and there is\n\
2512at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513
2514static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002515string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002516{
Fred Drakeba096332000-07-09 07:04:36 +00002517 register const unsigned char *p
2518 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002519 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002520 int cased;
2521
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522 /* Shortcut for single character strings */
2523 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002524 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002525
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002526 /* Special case for empty strings */
2527 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002528 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002529
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530 e = p + PyString_GET_SIZE(self);
2531 cased = 0;
2532 for (; p < e; p++) {
2533 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002534 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002535 else if (!cased && islower(*p))
2536 cased = 1;
2537 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002538 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002539}
2540
2541
2542static char isupper__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002543"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002545Return True if all cased characters in S are uppercase and there is\n\
2546at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002547
2548static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002549string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002550{
Fred Drakeba096332000-07-09 07:04:36 +00002551 register const unsigned char *p
2552 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002553 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002554 int cased;
2555
Guido van Rossum4c08d552000-03-10 22:55:18 +00002556 /* Shortcut for single character strings */
2557 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002558 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002559
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002560 /* Special case for empty strings */
2561 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002562 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002563
Guido van Rossum4c08d552000-03-10 22:55:18 +00002564 e = p + PyString_GET_SIZE(self);
2565 cased = 0;
2566 for (; p < e; p++) {
2567 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002568 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569 else if (!cased && isupper(*p))
2570 cased = 1;
2571 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002572 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573}
2574
2575
2576static char istitle__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002577"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002578\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002579Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580may only follow uncased characters and lowercase characters only cased\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002581ones. Return False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002582
2583static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002584string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002585{
Fred Drakeba096332000-07-09 07:04:36 +00002586 register const unsigned char *p
2587 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002588 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002589 int cased, previous_is_cased;
2590
Guido van Rossum4c08d552000-03-10 22:55:18 +00002591 /* Shortcut for single character strings */
2592 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002593 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002594
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002595 /* Special case for empty strings */
2596 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002597 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002598
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 e = p + PyString_GET_SIZE(self);
2600 cased = 0;
2601 previous_is_cased = 0;
2602 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002603 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604
2605 if (isupper(ch)) {
2606 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002607 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002608 previous_is_cased = 1;
2609 cased = 1;
2610 }
2611 else if (islower(ch)) {
2612 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002613 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614 previous_is_cased = 1;
2615 cased = 1;
2616 }
2617 else
2618 previous_is_cased = 0;
2619 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002620 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002621}
2622
2623
2624static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002625"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002626\n\
2627Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002628Line breaks are not included in the resulting list unless keepends\n\
2629is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002630
2631#define SPLIT_APPEND(data, left, right) \
2632 str = PyString_FromStringAndSize(data + left, right - left); \
2633 if (!str) \
2634 goto onError; \
2635 if (PyList_Append(list, str)) { \
2636 Py_DECREF(str); \
2637 goto onError; \
2638 } \
2639 else \
2640 Py_DECREF(str);
2641
2642static PyObject*
2643string_splitlines(PyStringObject *self, PyObject *args)
2644{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002645 register int i;
2646 register int j;
2647 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002648 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002649 PyObject *list;
2650 PyObject *str;
2651 char *data;
2652
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002653 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002654 return NULL;
2655
2656 data = PyString_AS_STRING(self);
2657 len = PyString_GET_SIZE(self);
2658
Guido van Rossum4c08d552000-03-10 22:55:18 +00002659 list = PyList_New(0);
2660 if (!list)
2661 goto onError;
2662
2663 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002664 int eol;
2665
Guido van Rossum4c08d552000-03-10 22:55:18 +00002666 /* Find a line and append it */
2667 while (i < len && data[i] != '\n' && data[i] != '\r')
2668 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669
2670 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002671 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002672 if (i < len) {
2673 if (data[i] == '\r' && i + 1 < len &&
2674 data[i+1] == '\n')
2675 i += 2;
2676 else
2677 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002678 if (keepends)
2679 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002680 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002681 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002682 j = i;
2683 }
2684 if (j < len) {
2685 SPLIT_APPEND(data, j, len);
2686 }
2687
2688 return list;
2689
2690 onError:
2691 Py_DECREF(list);
2692 return NULL;
2693}
2694
2695#undef SPLIT_APPEND
2696
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002697
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002698static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002699string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002700 /* Counterparts of the obsolete stropmodule functions; except
2701 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002702 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2703 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2704 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2705 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2706 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2707 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2708 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2709 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2710 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2711 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2712 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2713 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2714 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2715 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2716 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2717 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002718 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002719 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2720 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2721 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002722 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002723 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002724 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002725 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2726 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2727 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2728 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2729 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2730 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2731 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2732 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2733 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2734 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002735#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002736 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002737#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002738 {NULL, NULL} /* sentinel */
2739};
2740
Guido van Rossumae960af2001-08-30 03:11:59 +00002741staticforward PyObject *
2742str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2743
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002744static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002745string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002746{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002747 PyObject *x = NULL;
2748 static char *kwlist[] = {"object", 0};
2749
Guido van Rossumae960af2001-08-30 03:11:59 +00002750 if (type != &PyString_Type)
2751 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002752 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2753 return NULL;
2754 if (x == NULL)
2755 return PyString_FromString("");
2756 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002757}
2758
Guido van Rossumae960af2001-08-30 03:11:59 +00002759static PyObject *
2760str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2761{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002762 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002763 int n;
2764
2765 assert(PyType_IsSubtype(type, &PyString_Type));
2766 tmp = string_new(&PyString_Type, args, kwds);
2767 if (tmp == NULL)
2768 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002769 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002770 n = PyString_GET_SIZE(tmp);
2771 pnew = type->tp_alloc(type, n);
2772 if (pnew != NULL) {
2773 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002774 ((PyStringObject *)pnew)->ob_shash =
2775 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002776 ((PyStringObject *)pnew)->ob_sinterned =
2777 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002778 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002779 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002780 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002781}
2782
Tim Peters6d6c1a32001-08-02 04:15:00 +00002783static char string_doc[] =
2784"str(object) -> string\n\
2785\n\
2786Return a nice string representation of the object.\n\
2787If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002788
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002789PyTypeObject PyString_Type = {
2790 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002791 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002792 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002793 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002794 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002795 (destructor)string_dealloc, /* tp_dealloc */
2796 (printfunc)string_print, /* tp_print */
2797 0, /* tp_getattr */
2798 0, /* tp_setattr */
2799 0, /* tp_compare */
2800 (reprfunc)string_repr, /* tp_repr */
2801 0, /* tp_as_number */
2802 &string_as_sequence, /* tp_as_sequence */
2803 0, /* tp_as_mapping */
2804 (hashfunc)string_hash, /* tp_hash */
2805 0, /* tp_call */
2806 (reprfunc)string_str, /* tp_str */
2807 PyObject_GenericGetAttr, /* tp_getattro */
2808 0, /* tp_setattro */
2809 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002810 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002811 string_doc, /* tp_doc */
2812 0, /* tp_traverse */
2813 0, /* tp_clear */
2814 (richcmpfunc)string_richcompare, /* tp_richcompare */
2815 0, /* tp_weaklistoffset */
2816 0, /* tp_iter */
2817 0, /* tp_iternext */
2818 string_methods, /* tp_methods */
2819 0, /* tp_members */
2820 0, /* tp_getset */
2821 0, /* tp_base */
2822 0, /* tp_dict */
2823 0, /* tp_descr_get */
2824 0, /* tp_descr_set */
2825 0, /* tp_dictoffset */
2826 0, /* tp_init */
2827 0, /* tp_alloc */
2828 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00002829 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002830};
2831
2832void
Fred Drakeba096332000-07-09 07:04:36 +00002833PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002834{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002835 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002836 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002837 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002838 if (w == NULL || !PyString_Check(*pv)) {
2839 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002840 *pv = NULL;
2841 return;
2842 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002843 v = string_concat((PyStringObject *) *pv, w);
2844 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002845 *pv = v;
2846}
2847
Guido van Rossum013142a1994-08-30 08:19:36 +00002848void
Fred Drakeba096332000-07-09 07:04:36 +00002849PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002850{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002851 PyString_Concat(pv, w);
2852 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002853}
2854
2855
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002856/* The following function breaks the notion that strings are immutable:
2857 it changes the size of a string. We get away with this only if there
2858 is only one module referencing the object. You can also think of it
2859 as creating a new string object and destroying the old one, only
2860 more efficiently. In any case, don't use this if the string may
2861 already be known to some other part of the code... */
2862
2863int
Fred Drakeba096332000-07-09 07:04:36 +00002864_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002865{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002866 register PyObject *v;
2867 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002868 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002869 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002870 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002871 Py_DECREF(v);
2872 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002873 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002874 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002875 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002876#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002877 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002878#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002879 _Py_ForgetReference(v);
2880 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00002881 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002882 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002883 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00002884 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002885 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002886 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002887 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002888 _Py_NewReference(*pv);
2889 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002890 sv->ob_size = newsize;
2891 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002892 return 0;
2893}
Guido van Rossume5372401993-03-16 12:15:04 +00002894
2895/* Helpers for formatstring */
2896
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002897static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002898getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002899{
2900 int argidx = *p_argidx;
2901 if (argidx < arglen) {
2902 (*p_argidx)++;
2903 if (arglen < 0)
2904 return args;
2905 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002906 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002907 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002908 PyErr_SetString(PyExc_TypeError,
2909 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002910 return NULL;
2911}
2912
Tim Peters38fd5b62000-09-21 05:43:11 +00002913/* Format codes
2914 * F_LJUST '-'
2915 * F_SIGN '+'
2916 * F_BLANK ' '
2917 * F_ALT '#'
2918 * F_ZERO '0'
2919 */
Guido van Rossume5372401993-03-16 12:15:04 +00002920#define F_LJUST (1<<0)
2921#define F_SIGN (1<<1)
2922#define F_BLANK (1<<2)
2923#define F_ALT (1<<3)
2924#define F_ZERO (1<<4)
2925
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002926static int
Fred Drakeba096332000-07-09 07:04:36 +00002927formatfloat(char *buf, size_t buflen, int flags,
2928 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002929{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002930 /* fmt = '%#.' + `prec` + `type`
2931 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002932 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002933 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002934 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002935 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002936 if (prec < 0)
2937 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002938 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2939 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00002940 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
2941 (flags&F_ALT) ? "#" : "",
2942 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002943 /* worst case length calc to ensure no buffer overrun:
2944 fmt = %#.<prec>g
2945 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002946 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002947 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2948 If prec=0 the effective precision is 1 (the leading digit is
2949 always given), therefore increase by one to 10+prec. */
2950 if (buflen <= (size_t)10 + (size_t)prec) {
2951 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002952 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002953 return -1;
2954 }
Tim Peters885d4572001-11-28 20:27:42 +00002955 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002956 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002957}
2958
Tim Peters38fd5b62000-09-21 05:43:11 +00002959/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2960 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2961 * Python's regular ints.
2962 * Return value: a new PyString*, or NULL if error.
2963 * . *pbuf is set to point into it,
2964 * *plen set to the # of chars following that.
2965 * Caller must decref it when done using pbuf.
2966 * The string starting at *pbuf is of the form
2967 * "-"? ("0x" | "0X")? digit+
2968 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002969 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002970 * There will be at least prec digits, zero-filled on the left if
2971 * necessary to get that many.
2972 * val object to be converted
2973 * flags bitmask of format flags; only F_ALT is looked at
2974 * prec minimum number of digits; 0-fill on left if needed
2975 * type a character in [duoxX]; u acts the same as d
2976 *
2977 * CAUTION: o, x and X conversions on regular ints can never
2978 * produce a '-' sign, but can for Python's unbounded ints.
2979 */
2980PyObject*
2981_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2982 char **pbuf, int *plen)
2983{
2984 PyObject *result = NULL;
2985 char *buf;
2986 int i;
2987 int sign; /* 1 if '-', else 0 */
2988 int len; /* number of characters */
2989 int numdigits; /* len == numnondigits + numdigits */
2990 int numnondigits = 0;
2991
2992 switch (type) {
2993 case 'd':
2994 case 'u':
2995 result = val->ob_type->tp_str(val);
2996 break;
2997 case 'o':
2998 result = val->ob_type->tp_as_number->nb_oct(val);
2999 break;
3000 case 'x':
3001 case 'X':
3002 numnondigits = 2;
3003 result = val->ob_type->tp_as_number->nb_hex(val);
3004 break;
3005 default:
3006 assert(!"'type' not in [duoxX]");
3007 }
3008 if (!result)
3009 return NULL;
3010
3011 /* To modify the string in-place, there can only be one reference. */
3012 if (result->ob_refcnt != 1) {
3013 PyErr_BadInternalCall();
3014 return NULL;
3015 }
3016 buf = PyString_AsString(result);
3017 len = PyString_Size(result);
3018 if (buf[len-1] == 'L') {
3019 --len;
3020 buf[len] = '\0';
3021 }
3022 sign = buf[0] == '-';
3023 numnondigits += sign;
3024 numdigits = len - numnondigits;
3025 assert(numdigits > 0);
3026
Tim Petersfff53252001-04-12 18:38:48 +00003027 /* Get rid of base marker unless F_ALT */
3028 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003029 /* Need to skip 0x, 0X or 0. */
3030 int skipped = 0;
3031 switch (type) {
3032 case 'o':
3033 assert(buf[sign] == '0');
3034 /* If 0 is only digit, leave it alone. */
3035 if (numdigits > 1) {
3036 skipped = 1;
3037 --numdigits;
3038 }
3039 break;
3040 case 'x':
3041 case 'X':
3042 assert(buf[sign] == '0');
3043 assert(buf[sign + 1] == 'x');
3044 skipped = 2;
3045 numnondigits -= 2;
3046 break;
3047 }
3048 if (skipped) {
3049 buf += skipped;
3050 len -= skipped;
3051 if (sign)
3052 buf[0] = '-';
3053 }
3054 assert(len == numnondigits + numdigits);
3055 assert(numdigits > 0);
3056 }
3057
3058 /* Fill with leading zeroes to meet minimum width. */
3059 if (prec > numdigits) {
3060 PyObject *r1 = PyString_FromStringAndSize(NULL,
3061 numnondigits + prec);
3062 char *b1;
3063 if (!r1) {
3064 Py_DECREF(result);
3065 return NULL;
3066 }
3067 b1 = PyString_AS_STRING(r1);
3068 for (i = 0; i < numnondigits; ++i)
3069 *b1++ = *buf++;
3070 for (i = 0; i < prec - numdigits; i++)
3071 *b1++ = '0';
3072 for (i = 0; i < numdigits; i++)
3073 *b1++ = *buf++;
3074 *b1 = '\0';
3075 Py_DECREF(result);
3076 result = r1;
3077 buf = PyString_AS_STRING(result);
3078 len = numnondigits + prec;
3079 }
3080
3081 /* Fix up case for hex conversions. */
3082 switch (type) {
3083 case 'x':
3084 /* Need to convert all upper case letters to lower case. */
3085 for (i = 0; i < len; i++)
3086 if (buf[i] >= 'A' && buf[i] <= 'F')
3087 buf[i] += 'a'-'A';
3088 break;
3089 case 'X':
3090 /* Need to convert 0x to 0X (and -0x to -0X). */
3091 if (buf[sign + 1] == 'x')
3092 buf[sign + 1] = 'X';
3093 break;
3094 }
3095 *pbuf = buf;
3096 *plen = len;
3097 return result;
3098}
3099
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003100static int
Fred Drakeba096332000-07-09 07:04:36 +00003101formatint(char *buf, size_t buflen, int flags,
3102 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003103{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003104 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003105 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3106 + 1 + 1 = 24 */
3107 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003108 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003109
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003110 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003111 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003112 if (prec < 0)
3113 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003114
3115 if ((flags & F_ALT) &&
3116 (type == 'x' || type == 'X')) {
3117 /* When converting under %#x or %#X, there are a number
3118 * of issues that cause pain:
3119 * - when 0 is being converted, the C standard leaves off
3120 * the '0x' or '0X', which is inconsistent with other
3121 * %#x/%#X conversions and inconsistent with Python's
3122 * hex() function
3123 * - there are platforms that violate the standard and
3124 * convert 0 with the '0x' or '0X'
3125 * (Metrowerks, Compaq Tru64)
3126 * - there are platforms that give '0x' when converting
3127 * under %#X, but convert 0 in accordance with the
3128 * standard (OS/2 EMX)
3129 *
3130 * We can achieve the desired consistency by inserting our
3131 * own '0x' or '0X' prefix, and substituting %x/%X in place
3132 * of %#x/%#X.
3133 *
3134 * Note that this is the same approach as used in
3135 * formatint() in unicodeobject.c
3136 */
3137 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
3138 type, prec, type);
3139 }
3140 else {
3141 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
3142 (flags&F_ALT) ? "#" : "",
3143 prec, type);
3144 }
3145
Tim Peters38fd5b62000-09-21 05:43:11 +00003146 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003147 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3148 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003149 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003150 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003151 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003152 return -1;
3153 }
Tim Peters885d4572001-11-28 20:27:42 +00003154 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003155 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003156}
3157
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003158static int
Fred Drakeba096332000-07-09 07:04:36 +00003159formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003160{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003161 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003162 if (PyString_Check(v)) {
3163 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003164 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003165 }
3166 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003167 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003168 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003169 }
3170 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003171 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003172}
3173
Guido van Rossum013142a1994-08-30 08:19:36 +00003174
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003175/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3176
3177 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3178 chars are formatted. XXX This is a magic number. Each formatting
3179 routine does bounds checking to ensure no overflow, but a better
3180 solution may be to malloc a buffer of appropriate size for each
3181 format. For now, the current solution is sufficient.
3182*/
3183#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003184
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003185PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003186PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003187{
3188 char *fmt, *res;
3189 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003190 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003191 PyObject *result, *orig_args;
3192#ifdef Py_USING_UNICODE
3193 PyObject *v, *w;
3194#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003195 PyObject *dict = NULL;
3196 if (format == NULL || !PyString_Check(format) || args == NULL) {
3197 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003198 return NULL;
3199 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003200 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003201 fmt = PyString_AS_STRING(format);
3202 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003203 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003204 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003205 if (result == NULL)
3206 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003207 res = PyString_AsString(result);
3208 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003209 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003210 argidx = 0;
3211 }
3212 else {
3213 arglen = -1;
3214 argidx = -2;
3215 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003216 if (args->ob_type->tp_as_mapping)
3217 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003218 while (--fmtcnt >= 0) {
3219 if (*fmt != '%') {
3220 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003221 rescnt = fmtcnt + 100;
3222 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003223 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003224 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003225 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003226 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003227 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003228 }
3229 *res++ = *fmt++;
3230 }
3231 else {
3232 /* Got a format specifier */
3233 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003234 int width = -1;
3235 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003236 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003237 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003238 PyObject *v = NULL;
3239 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003240 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003241 int sign;
3242 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003243 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003244#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003245 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003246 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003247#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003248
Guido van Rossumda9c2711996-12-05 21:58:58 +00003249 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003250 if (*fmt == '(') {
3251 char *keystart;
3252 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003253 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003254 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003255
3256 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003257 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003258 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003259 goto error;
3260 }
3261 ++fmt;
3262 --fmtcnt;
3263 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003264 /* Skip over balanced parentheses */
3265 while (pcount > 0 && --fmtcnt >= 0) {
3266 if (*fmt == ')')
3267 --pcount;
3268 else if (*fmt == '(')
3269 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003270 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003271 }
3272 keylen = fmt - keystart - 1;
3273 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003274 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003275 "incomplete format key");
3276 goto error;
3277 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003278 key = PyString_FromStringAndSize(keystart,
3279 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003280 if (key == NULL)
3281 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003282 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003283 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003284 args_owned = 0;
3285 }
3286 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003287 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003288 if (args == NULL) {
3289 goto error;
3290 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003291 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003292 arglen = -1;
3293 argidx = -2;
3294 }
Guido van Rossume5372401993-03-16 12:15:04 +00003295 while (--fmtcnt >= 0) {
3296 switch (c = *fmt++) {
3297 case '-': flags |= F_LJUST; continue;
3298 case '+': flags |= F_SIGN; continue;
3299 case ' ': flags |= F_BLANK; continue;
3300 case '#': flags |= F_ALT; continue;
3301 case '0': flags |= F_ZERO; continue;
3302 }
3303 break;
3304 }
3305 if (c == '*') {
3306 v = getnextarg(args, arglen, &argidx);
3307 if (v == NULL)
3308 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003309 if (!PyInt_Check(v)) {
3310 PyErr_SetString(PyExc_TypeError,
3311 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003312 goto error;
3313 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003314 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003315 if (width < 0) {
3316 flags |= F_LJUST;
3317 width = -width;
3318 }
Guido van Rossume5372401993-03-16 12:15:04 +00003319 if (--fmtcnt >= 0)
3320 c = *fmt++;
3321 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003322 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003323 width = c - '0';
3324 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003325 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003326 if (!isdigit(c))
3327 break;
3328 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003329 PyErr_SetString(
3330 PyExc_ValueError,
3331 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003332 goto error;
3333 }
3334 width = width*10 + (c - '0');
3335 }
3336 }
3337 if (c == '.') {
3338 prec = 0;
3339 if (--fmtcnt >= 0)
3340 c = *fmt++;
3341 if (c == '*') {
3342 v = getnextarg(args, arglen, &argidx);
3343 if (v == NULL)
3344 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003345 if (!PyInt_Check(v)) {
3346 PyErr_SetString(
3347 PyExc_TypeError,
3348 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003349 goto error;
3350 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003351 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003352 if (prec < 0)
3353 prec = 0;
3354 if (--fmtcnt >= 0)
3355 c = *fmt++;
3356 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003357 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003358 prec = c - '0';
3359 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003360 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003361 if (!isdigit(c))
3362 break;
3363 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003364 PyErr_SetString(
3365 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003366 "prec too big");
3367 goto error;
3368 }
3369 prec = prec*10 + (c - '0');
3370 }
3371 }
3372 } /* prec */
3373 if (fmtcnt >= 0) {
3374 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003375 if (--fmtcnt >= 0)
3376 c = *fmt++;
3377 }
3378 }
3379 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003380 PyErr_SetString(PyExc_ValueError,
3381 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003382 goto error;
3383 }
3384 if (c != '%') {
3385 v = getnextarg(args, arglen, &argidx);
3386 if (v == NULL)
3387 goto error;
3388 }
3389 sign = 0;
3390 fill = ' ';
3391 switch (c) {
3392 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003393 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003394 len = 1;
3395 break;
3396 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003397 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003398#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003399 if (PyUnicode_Check(v)) {
3400 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003401 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003402 goto unicode;
3403 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003404#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003405 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003406 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003407 else
3408 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003409 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003410 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003411 if (!PyString_Check(temp)) {
3412 PyErr_SetString(PyExc_TypeError,
3413 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003414 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003415 goto error;
3416 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003417 pbuf = PyString_AS_STRING(temp);
3418 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003419 if (prec >= 0 && len > prec)
3420 len = prec;
3421 break;
3422 case 'i':
3423 case 'd':
3424 case 'u':
3425 case 'o':
3426 case 'x':
3427 case 'X':
3428 if (c == 'i')
3429 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003430 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003431 temp = _PyString_FormatLong(v, flags,
3432 prec, c, &pbuf, &len);
3433 if (!temp)
3434 goto error;
3435 /* unbounded ints can always produce
3436 a sign character! */
3437 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003438 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003439 else {
3440 pbuf = formatbuf;
3441 len = formatint(pbuf, sizeof(formatbuf),
3442 flags, prec, c, v);
3443 if (len < 0)
3444 goto error;
3445 /* only d conversion is signed */
3446 sign = c == 'd';
3447 }
3448 if (flags & F_ZERO)
3449 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003450 break;
3451 case 'e':
3452 case 'E':
3453 case 'f':
3454 case 'g':
3455 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003456 pbuf = formatbuf;
3457 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003458 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003459 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003460 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003461 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003462 fill = '0';
3463 break;
3464 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003465 pbuf = formatbuf;
3466 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003467 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003468 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003469 break;
3470 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003471 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003472 "unsupported format character '%c' (0x%x) "
3473 "at index %i",
3474 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003475 goto error;
3476 }
3477 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003478 if (*pbuf == '-' || *pbuf == '+') {
3479 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003480 len--;
3481 }
3482 else if (flags & F_SIGN)
3483 sign = '+';
3484 else if (flags & F_BLANK)
3485 sign = ' ';
3486 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003487 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003488 }
3489 if (width < len)
3490 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003491 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003492 reslen -= rescnt;
3493 rescnt = width + fmtcnt + 100;
3494 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003495 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003496 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003497 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003498 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003499 }
3500 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003501 if (fill != ' ')
3502 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003503 rescnt--;
3504 if (width > len)
3505 width--;
3506 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003507 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3508 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003509 assert(pbuf[1] == c);
3510 if (fill != ' ') {
3511 *res++ = *pbuf++;
3512 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003513 }
Tim Petersfff53252001-04-12 18:38:48 +00003514 rescnt -= 2;
3515 width -= 2;
3516 if (width < 0)
3517 width = 0;
3518 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003519 }
3520 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003521 do {
3522 --rescnt;
3523 *res++ = fill;
3524 } while (--width > len);
3525 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003526 if (fill == ' ') {
3527 if (sign)
3528 *res++ = sign;
3529 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003530 (c == 'x' || c == 'X')) {
3531 assert(pbuf[0] == '0');
3532 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003533 *res++ = *pbuf++;
3534 *res++ = *pbuf++;
3535 }
3536 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003537 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003538 res += len;
3539 rescnt -= len;
3540 while (--width >= len) {
3541 --rescnt;
3542 *res++ = ' ';
3543 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003544 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003545 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003546 "not all arguments converted");
3547 goto error;
3548 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003549 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003550 } /* '%' */
3551 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003552 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003553 PyErr_SetString(PyExc_TypeError,
3554 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003555 goto error;
3556 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003557 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003558 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003559 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003560 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003561 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003562
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003563#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003564 unicode:
3565 if (args_owned) {
3566 Py_DECREF(args);
3567 args_owned = 0;
3568 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003569 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003570 if (PyTuple_Check(orig_args) && argidx > 0) {
3571 PyObject *v;
3572 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3573 v = PyTuple_New(n);
3574 if (v == NULL)
3575 goto error;
3576 while (--n >= 0) {
3577 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3578 Py_INCREF(w);
3579 PyTuple_SET_ITEM(v, n, w);
3580 }
3581 args = v;
3582 } else {
3583 Py_INCREF(orig_args);
3584 args = orig_args;
3585 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003586 args_owned = 1;
3587 /* Take what we have of the result and let the Unicode formatting
3588 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003589 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003590 if (_PyString_Resize(&result, rescnt))
3591 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003592 fmtcnt = PyString_GET_SIZE(format) - \
3593 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003594 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3595 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003596 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003597 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003598 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003599 if (v == NULL)
3600 goto error;
3601 /* Paste what we have (result) to what the Unicode formatting
3602 function returned (v) and return the result (or error) */
3603 w = PyUnicode_Concat(result, v);
3604 Py_DECREF(result);
3605 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003606 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003607 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003608#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003609
Guido van Rossume5372401993-03-16 12:15:04 +00003610 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003611 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003612 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003613 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003614 }
Guido van Rossume5372401993-03-16 12:15:04 +00003615 return NULL;
3616}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003617
3618
Guido van Rossum2a61e741997-01-18 07:55:05 +00003619
Barry Warsaw4df762f2000-08-16 23:41:01 +00003620/* This dictionary will leak at PyString_Fini() time. That's acceptable
3621 * because PyString_Fini() specifically frees interned strings that are
3622 * only referenced by this dictionary. The CVS log entry for revision 2.45
3623 * says:
3624 *
3625 * Change the Fini function to only remove otherwise unreferenced
3626 * strings from the interned table. There are references in
3627 * hard-to-find static variables all over the interpreter, and it's not
3628 * worth trying to get rid of all those; but "uninterning" isn't fair
3629 * either and may cause subtle failures later -- so we have to keep them
3630 * in the interned table.
3631 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003632static PyObject *interned;
3633
3634void
Fred Drakeba096332000-07-09 07:04:36 +00003635PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003636{
3637 register PyStringObject *s = (PyStringObject *)(*p);
3638 PyObject *t;
3639 if (s == NULL || !PyString_Check(s))
3640 Py_FatalError("PyString_InternInPlace: strings only please!");
3641 if ((t = s->ob_sinterned) != NULL) {
3642 if (t == (PyObject *)s)
3643 return;
3644 Py_INCREF(t);
3645 *p = t;
3646 Py_DECREF(s);
3647 return;
3648 }
3649 if (interned == NULL) {
3650 interned = PyDict_New();
3651 if (interned == NULL)
3652 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003653 }
3654 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3655 Py_INCREF(t);
3656 *p = s->ob_sinterned = t;
3657 Py_DECREF(s);
3658 return;
3659 }
Tim Peters111f6092001-09-12 07:54:51 +00003660 /* Ensure that only true string objects appear in the intern dict,
3661 and as the value of ob_sinterned. */
3662 if (PyString_CheckExact(s)) {
3663 t = (PyObject *)s;
3664 if (PyDict_SetItem(interned, t, t) == 0) {
3665 s->ob_sinterned = t;
3666 return;
3667 }
3668 }
3669 else {
3670 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3671 PyString_GET_SIZE(s));
3672 if (t != NULL) {
3673 if (PyDict_SetItem(interned, t, t) == 0) {
3674 *p = s->ob_sinterned = t;
3675 Py_DECREF(s);
3676 return;
3677 }
3678 Py_DECREF(t);
3679 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003680 }
3681 PyErr_Clear();
3682}
3683
3684
3685PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003686PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003687{
3688 PyObject *s = PyString_FromString(cp);
3689 if (s == NULL)
3690 return NULL;
3691 PyString_InternInPlace(&s);
3692 return s;
3693}
3694
Guido van Rossum8cf04761997-08-02 02:57:45 +00003695void
Fred Drakeba096332000-07-09 07:04:36 +00003696PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003697{
3698 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003699 for (i = 0; i < UCHAR_MAX + 1; i++) {
3700 Py_XDECREF(characters[i]);
3701 characters[i] = NULL;
3702 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003703 Py_XDECREF(nullstring);
3704 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003705 if (interned) {
3706 int pos, changed;
3707 PyObject *key, *value;
3708 do {
3709 changed = 0;
3710 pos = 0;
3711 while (PyDict_Next(interned, &pos, &key, &value)) {
3712 if (key->ob_refcnt == 2 && key == value) {
3713 PyDict_DelItem(interned, key);
3714 changed = 1;
3715 }
3716 }
3717 } while (changed);
3718 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003719}
Barry Warsawa903ad982001-02-23 16:40:48 +00003720
Barry Warsawa903ad982001-02-23 16:40:48 +00003721void _Py_ReleaseInternedStrings(void)
3722{
3723 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003724 fprintf(stderr, "releasing interned strings\n");
3725 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003726 Py_DECREF(interned);
3727 interned = NULL;
3728 }
3729}