blob: 8ae9407476e8e4af6f809156442e1b522bbba7cf [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
Guido van Rossum45ec02a2002-08-19 21:43:18 +000018/* This dictionary holds all interned strings. Note that references to
19 strings in this dictionary are *not* counted in the string's ob_refcnt.
20 When the interned string reaches a refcnt of 0 the string deallocation
21 function will delete the reference from this dictionary.
22
23 Another way to look at this is that to say that the actual reference
24 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
25*/
26static PyObject *interned;
27
28
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000029/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For both PyString_FromString() and PyString_FromStringAndSize(), the
31 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000035 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 For PyString_FromStringAndSize(), the parameter the parameter `str' is
38 either NULL or else points to a string containing at least `size' bytes.
39 For PyString_FromStringAndSize(), the string in the `str' parameter does
40 not have to be null-terminated. (Therefore it is safe to construct a
41 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
42 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
43 bytes (setting the last byte to the null terminating character) and you can
44 fill in the data yourself. If `str' is non-NULL then the resulting
45 PyString object must be treated as immutable and you must not fill in nor
46 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000047
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000048 The PyObject member `op->ob_size', which denotes the number of "extra
49 items" in a variable-size object, will contain the number of bytes
50 allocated for string data, not counting the null terminating character. It
51 is therefore equal to the equal to the `size' parameter (for
52 PyString_FromStringAndSize()) or the length of the string in the `str'
53 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000056PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000057{
Tim Peters9e897f42001-05-09 07:37:07 +000058 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000059 if (size == 0 && (op = nullstring) != NULL) {
60#ifdef COUNT_ALLOCS
61 null_strings++;
62#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 Py_INCREF(op);
64 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 if (size == 1 && str != NULL &&
67 (op = characters[*str & UCHAR_MAX]) != NULL)
68 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000069#ifdef COUNT_ALLOCS
70 one_strings++;
71#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000072 Py_INCREF(op);
73 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000074 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000075
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000076 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000078 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000079 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000081 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000082 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000083 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000084 if (str != NULL)
85 memcpy(op->ob_sval, str, size);
86 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000087 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000089 PyObject *t = (PyObject *)op;
90 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000091 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000092 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000095 PyObject *t = (PyObject *)op;
96 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000097 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000098 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000099 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102}
103
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000105PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000106{
Tim Peters62de65b2001-12-06 20:29:32 +0000107 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000108 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000109
110 assert(str != NULL);
111 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000112 if (size > INT_MAX) {
113 PyErr_SetString(PyExc_OverflowError,
114 "string is too long for a Python string");
115 return NULL;
116 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000117 if (size == 0 && (op = nullstring) != NULL) {
118#ifdef COUNT_ALLOCS
119 null_strings++;
120#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000121 Py_INCREF(op);
122 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000123 }
124 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
125#ifdef COUNT_ALLOCS
126 one_strings++;
127#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000128 Py_INCREF(op);
129 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000131
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000132 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000133 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000134 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000135 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000137 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000139 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000140 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000141 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000143 PyObject *t = (PyObject *)op;
144 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000145 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000146 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000149 PyObject *t = (PyObject *)op;
150 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000151 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000152 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000153 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000155 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000156}
157
Barry Warsawdadace02001-08-24 18:32:06 +0000158PyObject *
159PyString_FromFormatV(const char *format, va_list vargs)
160{
Tim Petersc15c4f12001-10-02 21:32:07 +0000161 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000162 int n = 0;
163 const char* f;
164 char *s;
165 PyObject* string;
166
Tim Petersc15c4f12001-10-02 21:32:07 +0000167#ifdef VA_LIST_IS_ARRAY
168 memcpy(count, vargs, sizeof(va_list));
169#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#ifdef __va_copy
171 __va_copy(count, vargs);
172#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000173 count = vargs;
174#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000175#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000176 /* step 1: figure out how large a buffer we need */
177 for (f = format; *f; f++) {
178 if (*f == '%') {
179 const char* p = f;
180 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
181 ;
182
183 /* skip the 'l' in %ld, since it doesn't change the
184 width. although only %d is supported (see
185 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000186 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 if (*f == 'l' && *(f+1) == 'd')
188 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000189
Barry Warsawdadace02001-08-24 18:32:06 +0000190 switch (*f) {
191 case 'c':
192 (void)va_arg(count, int);
193 /* fall through... */
194 case '%':
195 n++;
196 break;
197 case 'd': case 'i': case 'x':
198 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000199 /* 20 bytes is enough to hold a 64-bit
200 integer. Decimal takes the most space.
201 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000202 n += 20;
203 break;
204 case 's':
205 s = va_arg(count, char*);
206 n += strlen(s);
207 break;
208 case 'p':
209 (void) va_arg(count, int);
210 /* maximum 64-bit pointer representation:
211 * 0xffffffffffffffff
212 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000213 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000214 */
215 n += 19;
216 break;
217 default:
218 /* if we stumble upon an unknown
219 formatting code, copy the rest of
220 the format string to the output
221 string. (we cannot just skip the
222 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000223 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000224 n += strlen(p);
225 goto expand;
226 }
227 } else
228 n++;
229 }
230 expand:
231 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000232 /* Since we've analyzed how much space we need for the worst case,
233 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000234 string = PyString_FromStringAndSize(NULL, n);
235 if (!string)
236 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000237
Barry Warsawdadace02001-08-24 18:32:06 +0000238 s = PyString_AsString(string);
239
240 for (f = format; *f; f++) {
241 if (*f == '%') {
242 const char* p = f++;
243 int i, longflag = 0;
244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000263
Barry Warsawdadace02001-08-24 18:32:06 +0000264 switch (*f) {
265 case 'c':
266 *s++ = va_arg(vargs, int);
267 break;
268 case 'd':
269 if (longflag)
270 sprintf(s, "%ld", va_arg(vargs, long));
271 else
272 sprintf(s, "%d", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'i':
276 sprintf(s, "%i", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 'x':
280 sprintf(s, "%x", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 's':
284 p = va_arg(vargs, char*);
285 i = strlen(p);
286 if (n > 0 && i > n)
287 i = n;
288 memcpy(s, p, i);
289 s += i;
290 break;
291 case 'p':
292 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000293 /* %p is ill-defined: ensure leading 0x. */
294 if (s[1] == 'X')
295 s[1] = 'x';
296 else if (s[1] != 'x') {
297 memmove(s+2, s, strlen(s)+1);
298 s[0] = '0';
299 s[1] = 'x';
300 }
Barry Warsawdadace02001-08-24 18:32:06 +0000301 s += strlen(s);
302 break;
303 case '%':
304 *s++ = '%';
305 break;
306 default:
307 strcpy(s, p);
308 s += strlen(s);
309 goto end;
310 }
311 } else
312 *s++ = *f;
313 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000314
Barry Warsawdadace02001-08-24 18:32:06 +0000315 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000317 return string;
318}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000319
Barry Warsawdadace02001-08-24 18:32:06 +0000320PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000321PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000322{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000323 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000324 va_list vargs;
325
326#ifdef HAVE_STDARG_PROTOTYPES
327 va_start(vargs, format);
328#else
329 va_start(vargs);
330#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 ret = PyString_FromFormatV(format, vargs);
332 va_end(vargs);
333 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000334}
335
336
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000337PyObject *PyString_Decode(const char *s,
338 int size,
339 const char *encoding,
340 const char *errors)
341{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000342 PyObject *v, *str;
343
344 str = PyString_FromStringAndSize(s, size);
345 if (str == NULL)
346 return NULL;
347 v = PyString_AsDecodedString(str, encoding, errors);
348 Py_DECREF(str);
349 return v;
350}
351
352PyObject *PyString_AsDecodedObject(PyObject *str,
353 const char *encoding,
354 const char *errors)
355{
356 PyObject *v;
357
358 if (!PyString_Check(str)) {
359 PyErr_BadArgument();
360 goto onError;
361 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000362
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363 if (encoding == NULL) {
364#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000365 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000366#else
367 PyErr_SetString(PyExc_ValueError, "no encoding specified");
368 goto onError;
369#endif
370 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371
372 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 v = PyCodec_Decode(str, encoding, errors);
374 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376
377 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000378
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000380 return NULL;
381}
382
383PyObject *PyString_AsDecodedString(PyObject *str,
384 const char *encoding,
385 const char *errors)
386{
387 PyObject *v;
388
389 v = PyString_AsDecodedObject(str, encoding, errors);
390 if (v == NULL)
391 goto onError;
392
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000393#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394 /* Convert Unicode to a string using the default encoding */
395 if (PyUnicode_Check(v)) {
396 PyObject *temp = v;
397 v = PyUnicode_AsEncodedString(v, NULL, NULL);
398 Py_DECREF(temp);
399 if (v == NULL)
400 goto onError;
401 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000402#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 if (!PyString_Check(v)) {
404 PyErr_Format(PyExc_TypeError,
405 "decoder did not return a string object (type=%.400s)",
406 v->ob_type->tp_name);
407 Py_DECREF(v);
408 goto onError;
409 }
410
411 return v;
412
413 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000414 return NULL;
415}
416
417PyObject *PyString_Encode(const char *s,
418 int size,
419 const char *encoding,
420 const char *errors)
421{
422 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000423
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 str = PyString_FromStringAndSize(s, size);
425 if (str == NULL)
426 return NULL;
427 v = PyString_AsEncodedString(str, encoding, errors);
428 Py_DECREF(str);
429 return v;
430}
431
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000432PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000433 const char *encoding,
434 const char *errors)
435{
436 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000438 if (!PyString_Check(str)) {
439 PyErr_BadArgument();
440 goto onError;
441 }
442
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000443 if (encoding == NULL) {
444#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000445 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000446#else
447 PyErr_SetString(PyExc_ValueError, "no encoding specified");
448 goto onError;
449#endif
450 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451
452 /* Encode via the codec registry */
453 v = PyCodec_Encode(str, encoding, errors);
454 if (v == NULL)
455 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000456
457 return v;
458
459 onError:
460 return NULL;
461}
462
463PyObject *PyString_AsEncodedString(PyObject *str,
464 const char *encoding,
465 const char *errors)
466{
467 PyObject *v;
468
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000469 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000470 if (v == NULL)
471 goto onError;
472
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000473#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474 /* Convert Unicode to a string using the default encoding */
475 if (PyUnicode_Check(v)) {
476 PyObject *temp = v;
477 v = PyUnicode_AsEncodedString(v, NULL, NULL);
478 Py_DECREF(temp);
479 if (v == NULL)
480 goto onError;
481 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000482#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000483 if (!PyString_Check(v)) {
484 PyErr_Format(PyExc_TypeError,
485 "encoder did not return a string object (type=%.400s)",
486 v->ob_type->tp_name);
487 Py_DECREF(v);
488 goto onError;
489 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000490
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000492
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000493 onError:
494 return NULL;
495}
496
Guido van Rossum234f9421993-06-17 12:35:49 +0000497static void
Fred Drakeba096332000-07-09 07:04:36 +0000498string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000499{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000500 switch (PyString_CHECK_INTERNED(op)) {
501 case SSTATE_NOT_INTERNED:
502 break;
503
504 case SSTATE_INTERNED_MORTAL:
505 /* revive dead object temporarily for DelItem */
506 op->ob_refcnt = 3;
507 if (PyDict_DelItem(interned, op) != 0)
508 Py_FatalError(
509 "deletion of interned string failed");
510 break;
511
512 case SSTATE_INTERNED_IMMORTAL:
513 Py_FatalError("Immortal interned string died.");
514
515 default:
516 Py_FatalError("Inconsistent interned string state.");
517 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000518 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000519}
520
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000521/* Unescape a backslash-escaped string. If unicode is non-zero,
522 the string is a u-literal. If recode_encoding is non-zero,
523 the string is UTF-8 encoded and should be re-encoded in the
524 specified encoding. */
525
526PyObject *PyString_DecodeEscape(const char *s,
527 int len,
528 const char *errors,
529 int unicode,
530 const char *recode_encoding)
531{
532 int c;
533 char *p, *buf;
534 const char *end;
535 PyObject *v;
536 v = PyString_FromStringAndSize((char *)NULL,
537 recode_encoding ? 4*len:len);
538 if (v == NULL)
539 return NULL;
540 p = buf = PyString_AsString(v);
541 end = s + len;
542 while (s < end) {
543 if (*s != '\\') {
544#ifdef Py_USING_UNICODE
545 if (recode_encoding && (*s & 0x80)) {
546 PyObject *u, *w;
547 char *r;
548 const char* t;
549 int rn;
550 t = s;
551 /* Decode non-ASCII bytes as UTF-8. */
552 while (t < end && (*t & 0x80)) t++;
553 u = PyUnicode_DecodeUTF8(s, t - s, errors);
554 if(!u) goto failed;
555
556 /* Recode them in target encoding. */
557 w = PyUnicode_AsEncodedString(
558 u, recode_encoding, errors);
559 Py_DECREF(u);
560 if (!w) goto failed;
561
562 /* Append bytes to output buffer. */
563 r = PyString_AsString(w);
564 rn = PyString_Size(w);
565 memcpy(p, r, rn);
566 p += rn;
567 Py_DECREF(w);
568 s = t;
569 } else {
570 *p++ = *s++;
571 }
572#else
573 *p++ = *s++;
574#endif
575 continue;
576 }
577 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000578 if (s==end) {
579 PyErr_SetString(PyExc_ValueError,
580 "Trailing \\ in string");
581 goto failed;
582 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000583 switch (*s++) {
584 /* XXX This assumes ASCII! */
585 case '\n': break;
586 case '\\': *p++ = '\\'; break;
587 case '\'': *p++ = '\''; break;
588 case '\"': *p++ = '\"'; break;
589 case 'b': *p++ = '\b'; break;
590 case 'f': *p++ = '\014'; break; /* FF */
591 case 't': *p++ = '\t'; break;
592 case 'n': *p++ = '\n'; break;
593 case 'r': *p++ = '\r'; break;
594 case 'v': *p++ = '\013'; break; /* VT */
595 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
596 case '0': case '1': case '2': case '3':
597 case '4': case '5': case '6': case '7':
598 c = s[-1] - '0';
599 if ('0' <= *s && *s <= '7') {
600 c = (c<<3) + *s++ - '0';
601 if ('0' <= *s && *s <= '7')
602 c = (c<<3) + *s++ - '0';
603 }
604 *p++ = c;
605 break;
606 case 'x':
607 if (isxdigit(Py_CHARMASK(s[0]))
608 && isxdigit(Py_CHARMASK(s[1]))) {
609 unsigned int x = 0;
610 c = Py_CHARMASK(*s);
611 s++;
612 if (isdigit(c))
613 x = c - '0';
614 else if (islower(c))
615 x = 10 + c - 'a';
616 else
617 x = 10 + c - 'A';
618 x = x << 4;
619 c = Py_CHARMASK(*s);
620 s++;
621 if (isdigit(c))
622 x += c - '0';
623 else if (islower(c))
624 x += 10 + c - 'a';
625 else
626 x += 10 + c - 'A';
627 *p++ = x;
628 break;
629 }
630 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 PyErr_SetString(PyExc_ValueError,
632 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000633 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000634 }
635 if (strcmp(errors, "replace") == 0) {
636 *p++ = '?';
637 } else if (strcmp(errors, "ignore") == 0)
638 /* do nothing */;
639 else {
640 PyErr_Format(PyExc_ValueError,
641 "decoding error; "
642 "unknown error handling code: %.400s",
643 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000644 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000645 }
646#ifndef Py_USING_UNICODE
647 case 'u':
648 case 'U':
649 case 'N':
650 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "Unicode escapes not legal "
653 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656#endif
657 default:
658 *p++ = '\\';
659 *p++ = s[-1];
660 break;
661 }
662 }
663 _PyString_Resize(&v, (int)(p - buf));
664 return v;
665 failed:
666 Py_DECREF(v);
667 return NULL;
668}
669
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000670static int
671string_getsize(register PyObject *op)
672{
673 char *s;
674 int len;
675 if (PyString_AsStringAndSize(op, &s, &len))
676 return -1;
677 return len;
678}
679
680static /*const*/ char *
681string_getbuffer(register PyObject *op)
682{
683 char *s;
684 int len;
685 if (PyString_AsStringAndSize(op, &s, &len))
686 return NULL;
687 return s;
688}
689
Guido van Rossumd7047b31995-01-02 19:07:15 +0000690int
Fred Drakeba096332000-07-09 07:04:36 +0000691PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000692{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000693 if (!PyString_Check(op))
694 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000695 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000696}
697
698/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000699PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000700{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000701 if (!PyString_Check(op))
702 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000703 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704}
705
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000706int
707PyString_AsStringAndSize(register PyObject *obj,
708 register char **s,
709 register int *len)
710{
711 if (s == NULL) {
712 PyErr_BadInternalCall();
713 return -1;
714 }
715
716 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000717#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (PyUnicode_Check(obj)) {
719 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
720 if (obj == NULL)
721 return -1;
722 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000723 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000724#endif
725 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 PyErr_Format(PyExc_TypeError,
727 "expected string or Unicode object, "
728 "%.200s found", obj->ob_type->tp_name);
729 return -1;
730 }
731 }
732
733 *s = PyString_AS_STRING(obj);
734 if (len != NULL)
735 *len = PyString_GET_SIZE(obj);
736 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
737 PyErr_SetString(PyExc_TypeError,
738 "expected string without null bytes");
739 return -1;
740 }
741 return 0;
742}
743
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000744/* Methods */
745
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000746static int
Fred Drakeba096332000-07-09 07:04:36 +0000747string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748{
749 int i;
750 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000751 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000752
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000753 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000754 if (! PyString_CheckExact(op)) {
755 int ret;
756 /* A str subclass may have its own __str__ method. */
757 op = (PyStringObject *) PyObject_Str((PyObject *)op);
758 if (op == NULL)
759 return -1;
760 ret = string_print(op, fp, flags);
761 Py_DECREF(op);
762 return ret;
763 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000764 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000765 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000766 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000767 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000768
Thomas Wouters7e474022000-07-16 12:04:32 +0000769 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000770 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000771 if (memchr(op->ob_sval, '\'', op->ob_size) &&
772 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000773 quote = '"';
774
775 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000776 for (i = 0; i < op->ob_size; i++) {
777 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000778 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000780 else if (c == '\t')
781 fprintf(fp, "\\t");
782 else if (c == '\n')
783 fprintf(fp, "\\n");
784 else if (c == '\r')
785 fprintf(fp, "\\r");
786 else if (c < ' ' || c >= 0x7f)
787 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000788 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000789 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000791 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000792 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793}
794
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000795PyObject *
796PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000798 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000799 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
800 PyObject *v;
801 if (newsize > INT_MAX) {
802 PyErr_SetString(PyExc_OverflowError,
803 "string is too large to make repr");
804 }
805 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000806 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000807 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808 }
809 else {
810 register int i;
811 register char c;
812 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000813 int quote;
814
Thomas Wouters7e474022000-07-16 12:04:32 +0000815 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000817 if (smartquotes &&
818 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000819 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000820 quote = '"';
821
Tim Peters9161c8b2001-12-03 01:55:38 +0000822 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000824 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000825 /* There's at least enough room for a hex escape
826 and a closing quote. */
827 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000829 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000831 else if (c == '\t')
832 *p++ = '\\', *p++ = 't';
833 else if (c == '\n')
834 *p++ = '\\', *p++ = 'n';
835 else if (c == '\r')
836 *p++ = '\\', *p++ = 'r';
837 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000838 /* For performance, we don't want to call
839 PyOS_snprintf here (extra layers of
840 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000841 sprintf(p, "\\x%02x", c & 0xff);
842 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843 }
844 else
845 *p++ = c;
846 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000847 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000848 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000850 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000851 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000852 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000853 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854}
855
Guido van Rossum189f1df2001-05-01 16:51:53 +0000856static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000857string_repr(PyObject *op)
858{
859 return PyString_Repr(op, 1);
860}
861
862static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000863string_str(PyObject *s)
864{
Tim Petersc9933152001-10-16 20:18:24 +0000865 assert(PyString_Check(s));
866 if (PyString_CheckExact(s)) {
867 Py_INCREF(s);
868 return s;
869 }
870 else {
871 /* Subtype -- return genuine string with the same value. */
872 PyStringObject *t = (PyStringObject *) s;
873 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
874 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000875}
876
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877static int
Fred Drakeba096332000-07-09 07:04:36 +0000878string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879{
880 return a->ob_size;
881}
882
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000883static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000884string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000885{
886 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000887 register PyStringObject *op;
888 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000889#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000890 if (PyUnicode_Check(bb))
891 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000892#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000893 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000894 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000895 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 return NULL;
897 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000898#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000900 if ((a->ob_size == 0 || b->ob_size == 0) &&
901 PyString_CheckExact(a) && PyString_CheckExact(b)) {
902 if (a->ob_size == 0) {
903 Py_INCREF(bb);
904 return bb;
905 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000906 Py_INCREF(a);
907 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000908 }
909 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000910 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000911 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000912 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000913 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000915 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000916 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000917 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000918 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
919 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
920 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922#undef b
923}
924
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000925static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000926string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000927{
928 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000929 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000931 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000932 if (n < 0)
933 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000934 /* watch out for overflows: the size can overflow int,
935 * and the # of bytes needed can overflow size_t
936 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000938 if (n && size / n != a->ob_size) {
939 PyErr_SetString(PyExc_OverflowError,
940 "repeated string is too long");
941 return NULL;
942 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000943 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944 Py_INCREF(a);
945 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946 }
Tim Peters8f422462000-09-09 06:13:41 +0000947 nbytes = size * sizeof(char);
948 if (nbytes / sizeof(char) != (size_t)size ||
949 nbytes + sizeof(PyStringObject) <= nbytes) {
950 PyErr_SetString(PyExc_OverflowError,
951 "repeated string is too long");
952 return NULL;
953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000954 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000955 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000956 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000958 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000959 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000960 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000961 for (i = 0; i < size; i += a->ob_size)
962 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
963 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965}
966
967/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
968
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000970string_slice(register PyStringObject *a, register int i, register int j)
971 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000972{
973 if (i < 0)
974 i = 0;
975 if (j < 0)
976 j = 0; /* Avoid signed/unsigned bug in next line */
977 if (j > a->ob_size)
978 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000979 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
980 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981 Py_INCREF(a);
982 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000983 }
984 if (j < i)
985 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000986 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987}
988
Guido van Rossum9284a572000-03-07 15:53:43 +0000989static int
Fred Drakeba096332000-07-09 07:04:36 +0000990string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000991{
Barry Warsaw817918c2002-08-06 16:58:21 +0000992 const char *lhs, *rhs, *end;
993 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000994
995 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000996#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +0000997 if (PyUnicode_Check(el))
998 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000999#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001000 if (!PyString_Check(el)) {
1001 PyErr_SetString(PyExc_TypeError,
1002 "'in <string>' requires string as left operand");
1003 return -1;
1004 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001005 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001006 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001007 rhs = PyString_AS_STRING(el);
1008 lhs = PyString_AS_STRING(a);
1009
1010 /* optimize for a single character */
1011 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001012 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001013
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001015 while (lhs <= end) {
1016 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001017 return 1;
1018 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001019
Guido van Rossum9284a572000-03-07 15:53:43 +00001020 return 0;
1021}
1022
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001024string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001025{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001026 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001027 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001029 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001030 return NULL;
1031 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001032 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001033 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001034 if (v == NULL)
1035 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001036 else {
1037#ifdef COUNT_ALLOCS
1038 one_strings++;
1039#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001040 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001041 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001042 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001043}
1044
Martin v. Löwiscd353062001-05-24 16:56:35 +00001045static PyObject*
1046string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001047{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001048 int c;
1049 int len_a, len_b;
1050 int min_len;
1051 PyObject *result;
1052
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001053 /* Make sure both arguments are strings. */
1054 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001055 result = Py_NotImplemented;
1056 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001057 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001058 if (a == b) {
1059 switch (op) {
1060 case Py_EQ:case Py_LE:case Py_GE:
1061 result = Py_True;
1062 goto out;
1063 case Py_NE:case Py_LT:case Py_GT:
1064 result = Py_False;
1065 goto out;
1066 }
1067 }
1068 if (op == Py_EQ) {
1069 /* Supporting Py_NE here as well does not save
1070 much time, since Py_NE is rarely used. */
1071 if (a->ob_size == b->ob_size
1072 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001073 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001074 a->ob_size) == 0)) {
1075 result = Py_True;
1076 } else {
1077 result = Py_False;
1078 }
1079 goto out;
1080 }
1081 len_a = a->ob_size; len_b = b->ob_size;
1082 min_len = (len_a < len_b) ? len_a : len_b;
1083 if (min_len > 0) {
1084 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1085 if (c==0)
1086 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1087 }else
1088 c = 0;
1089 if (c == 0)
1090 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1091 switch (op) {
1092 case Py_LT: c = c < 0; break;
1093 case Py_LE: c = c <= 0; break;
1094 case Py_EQ: assert(0); break; /* unreachable */
1095 case Py_NE: c = c != 0; break;
1096 case Py_GT: c = c > 0; break;
1097 case Py_GE: c = c >= 0; break;
1098 default:
1099 result = Py_NotImplemented;
1100 goto out;
1101 }
1102 result = c ? Py_True : Py_False;
1103 out:
1104 Py_INCREF(result);
1105 return result;
1106}
1107
1108int
1109_PyString_Eq(PyObject *o1, PyObject *o2)
1110{
1111 PyStringObject *a, *b;
1112 a = (PyStringObject*)o1;
1113 b = (PyStringObject*)o2;
1114 return a->ob_size == b->ob_size
1115 && *a->ob_sval == *b->ob_sval
1116 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001117}
1118
Guido van Rossum9bfef441993-03-29 10:43:31 +00001119static long
Fred Drakeba096332000-07-09 07:04:36 +00001120string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001121{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001122 register int len;
1123 register unsigned char *p;
1124 register long x;
1125
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001126 if (a->ob_shash != -1)
1127 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001128 len = a->ob_size;
1129 p = (unsigned char *) a->ob_sval;
1130 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001131 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001132 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001133 x ^= a->ob_size;
1134 if (x == -1)
1135 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001136 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001137 return x;
1138}
1139
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001140static PyObject*
1141string_subscript(PyStringObject* self, PyObject* item)
1142{
1143 if (PyInt_Check(item)) {
1144 long i = PyInt_AS_LONG(item);
1145 if (i < 0)
1146 i += PyString_GET_SIZE(self);
1147 return string_item(self,i);
1148 }
1149 else if (PyLong_Check(item)) {
1150 long i = PyLong_AsLong(item);
1151 if (i == -1 && PyErr_Occurred())
1152 return NULL;
1153 if (i < 0)
1154 i += PyString_GET_SIZE(self);
1155 return string_item(self,i);
1156 }
1157 else if (PySlice_Check(item)) {
1158 int start, stop, step, slicelength, cur, i;
1159 char* source_buf;
1160 char* result_buf;
1161 PyObject* result;
1162
1163 if (PySlice_GetIndicesEx((PySliceObject*)item,
1164 PyString_GET_SIZE(self),
1165 &start, &stop, &step, &slicelength) < 0) {
1166 return NULL;
1167 }
1168
1169 if (slicelength <= 0) {
1170 return PyString_FromStringAndSize("", 0);
1171 }
1172 else {
1173 source_buf = PyString_AsString((PyObject*)self);
1174 result_buf = PyMem_Malloc(slicelength);
1175
1176 for (cur = start, i = 0; i < slicelength;
1177 cur += step, i++) {
1178 result_buf[i] = source_buf[cur];
1179 }
1180
1181 result = PyString_FromStringAndSize(result_buf,
1182 slicelength);
1183 PyMem_Free(result_buf);
1184 return result;
1185 }
1186 }
1187 else {
1188 PyErr_SetString(PyExc_TypeError,
1189 "string indices must be integers");
1190 return NULL;
1191 }
1192}
1193
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001194static int
Fred Drakeba096332000-07-09 07:04:36 +00001195string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001196{
1197 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001198 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001199 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001200 return -1;
1201 }
1202 *ptr = (void *)self->ob_sval;
1203 return self->ob_size;
1204}
1205
1206static int
Fred Drakeba096332000-07-09 07:04:36 +00001207string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001208{
Guido van Rossum045e6881997-09-08 18:30:11 +00001209 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001210 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001211 return -1;
1212}
1213
1214static int
Fred Drakeba096332000-07-09 07:04:36 +00001215string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001216{
1217 if ( lenp )
1218 *lenp = self->ob_size;
1219 return 1;
1220}
1221
Guido van Rossum1db70701998-10-08 02:18:52 +00001222static int
Fred Drakeba096332000-07-09 07:04:36 +00001223string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001224{
1225 if ( index != 0 ) {
1226 PyErr_SetString(PyExc_SystemError,
1227 "accessing non-existent string segment");
1228 return -1;
1229 }
1230 *ptr = self->ob_sval;
1231 return self->ob_size;
1232}
1233
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001234static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001235 (inquiry)string_length, /*sq_length*/
1236 (binaryfunc)string_concat, /*sq_concat*/
1237 (intargfunc)string_repeat, /*sq_repeat*/
1238 (intargfunc)string_item, /*sq_item*/
1239 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001240 0, /*sq_ass_item*/
1241 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001242 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001243};
1244
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001245static PyMappingMethods string_as_mapping = {
1246 (inquiry)string_length,
1247 (binaryfunc)string_subscript,
1248 0,
1249};
1250
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001251static PyBufferProcs string_as_buffer = {
1252 (getreadbufferproc)string_buffer_getreadbuf,
1253 (getwritebufferproc)string_buffer_getwritebuf,
1254 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001255 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001256};
1257
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001258
1259
1260#define LEFTSTRIP 0
1261#define RIGHTSTRIP 1
1262#define BOTHSTRIP 2
1263
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001264/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001265static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1266
1267#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001268
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001269
1270static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001271split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001272{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001273 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001274 PyObject* item;
1275 PyObject *list = PyList_New(0);
1276
1277 if (list == NULL)
1278 return NULL;
1279
Guido van Rossum4c08d552000-03-10 22:55:18 +00001280 for (i = j = 0; i < len; ) {
1281 while (i < len && isspace(Py_CHARMASK(s[i])))
1282 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001283 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001284 while (i < len && !isspace(Py_CHARMASK(s[i])))
1285 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001287 if (maxsplit-- <= 0)
1288 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1290 if (item == NULL)
1291 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001292 err = PyList_Append(list, item);
1293 Py_DECREF(item);
1294 if (err < 0)
1295 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 while (i < len && isspace(Py_CHARMASK(s[i])))
1297 i++;
1298 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 }
1300 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001301 if (j < len) {
1302 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1303 if (item == NULL)
1304 goto finally;
1305 err = PyList_Append(list, item);
1306 Py_DECREF(item);
1307 if (err < 0)
1308 goto finally;
1309 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310 return list;
1311 finally:
1312 Py_DECREF(list);
1313 return NULL;
1314}
1315
1316
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001317PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001318"S.split([sep [,maxsplit]]) -> list of strings\n\
1319\n\
1320Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001321delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001322splits are done. If sep is not specified or is None, any\n\
1323whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001324
1325static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001326string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327{
1328 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001329 int maxsplit = -1;
1330 const char *s = PyString_AS_STRING(self), *sub;
1331 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332
Guido van Rossum4c08d552000-03-10 22:55:18 +00001333 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335 if (maxsplit < 0)
1336 maxsplit = INT_MAX;
1337 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001339 if (PyString_Check(subobj)) {
1340 sub = PyString_AS_STRING(subobj);
1341 n = PyString_GET_SIZE(subobj);
1342 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001343#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001344 else if (PyUnicode_Check(subobj))
1345 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001346#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001347 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1348 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 if (n == 0) {
1350 PyErr_SetString(PyExc_ValueError, "empty separator");
1351 return NULL;
1352 }
1353
1354 list = PyList_New(0);
1355 if (list == NULL)
1356 return NULL;
1357
1358 i = j = 0;
1359 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001360 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001361 if (maxsplit-- <= 0)
1362 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001363 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1364 if (item == NULL)
1365 goto fail;
1366 err = PyList_Append(list, item);
1367 Py_DECREF(item);
1368 if (err < 0)
1369 goto fail;
1370 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371 }
1372 else
1373 i++;
1374 }
1375 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1376 if (item == NULL)
1377 goto fail;
1378 err = PyList_Append(list, item);
1379 Py_DECREF(item);
1380 if (err < 0)
1381 goto fail;
1382
1383 return list;
1384
1385 fail:
1386 Py_DECREF(list);
1387 return NULL;
1388}
1389
1390
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001391PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001392"S.join(sequence) -> string\n\
1393\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001394Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001395sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396
1397static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001398string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399{
1400 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001401 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 char *p;
1404 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001405 size_t sz = 0;
1406 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001407 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408
Tim Peters19fe14e2001-01-19 03:03:47 +00001409 seq = PySequence_Fast(orig, "");
1410 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001411 if (PyErr_ExceptionMatches(PyExc_TypeError))
1412 PyErr_Format(PyExc_TypeError,
1413 "sequence expected, %.80s found",
1414 orig->ob_type->tp_name);
1415 return NULL;
1416 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001417
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001418 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001419 if (seqlen == 0) {
1420 Py_DECREF(seq);
1421 return PyString_FromString("");
1422 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001424 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001425 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1426 PyErr_Format(PyExc_TypeError,
1427 "sequence item 0: expected string,"
1428 " %.80s found",
1429 item->ob_type->tp_name);
1430 Py_DECREF(seq);
1431 return NULL;
1432 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001433 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001434 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001435 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001437
Tim Peters19fe14e2001-01-19 03:03:47 +00001438 /* There are at least two things to join. Do a pre-pass to figure out
1439 * the total amount of space we'll need (sz), see whether any argument
1440 * is absurd, and defer to the Unicode join if appropriate.
1441 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001442 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001443 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001444 item = PySequence_Fast_GET_ITEM(seq, i);
1445 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001446#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001447 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001448 /* Defer to Unicode join.
1449 * CAUTION: There's no gurantee that the
1450 * original sequence can be iterated over
1451 * again, so we must pass seq here.
1452 */
1453 PyObject *result;
1454 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001455 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001456 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001457 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001458#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001459 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001460 "sequence item %i: expected string,"
1461 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001462 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001463 Py_DECREF(seq);
1464 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001465 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001466 sz += PyString_GET_SIZE(item);
1467 if (i != 0)
1468 sz += seplen;
1469 if (sz < old_sz || sz > INT_MAX) {
1470 PyErr_SetString(PyExc_OverflowError,
1471 "join() is too long for a Python string");
1472 Py_DECREF(seq);
1473 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001475 }
1476
1477 /* Allocate result space. */
1478 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1479 if (res == NULL) {
1480 Py_DECREF(seq);
1481 return NULL;
1482 }
1483
1484 /* Catenate everything. */
1485 p = PyString_AS_STRING(res);
1486 for (i = 0; i < seqlen; ++i) {
1487 size_t n;
1488 item = PySequence_Fast_GET_ITEM(seq, i);
1489 n = PyString_GET_SIZE(item);
1490 memcpy(p, PyString_AS_STRING(item), n);
1491 p += n;
1492 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001493 memcpy(p, sep, seplen);
1494 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001495 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001497
Jeremy Hylton49048292000-07-11 03:28:17 +00001498 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500}
1501
Tim Peters52e155e2001-06-16 05:42:57 +00001502PyObject *
1503_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001504{
Tim Petersa7259592001-06-16 05:11:17 +00001505 assert(sep != NULL && PyString_Check(sep));
1506 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001507 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001508}
1509
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001510static void
1511string_adjust_indices(int *start, int *end, int len)
1512{
1513 if (*end > len)
1514 *end = len;
1515 else if (*end < 0)
1516 *end += len;
1517 if (*end < 0)
1518 *end = 0;
1519 if (*start < 0)
1520 *start += len;
1521 if (*start < 0)
1522 *start = 0;
1523}
1524
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525static long
Fred Drakeba096332000-07-09 07:04:36 +00001526string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001528 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001529 int len = PyString_GET_SIZE(self);
1530 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001531 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001533 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001534 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001535 return -2;
1536 if (PyString_Check(subobj)) {
1537 sub = PyString_AS_STRING(subobj);
1538 n = PyString_GET_SIZE(subobj);
1539 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001540#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001541 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001542 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001543#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001544 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545 return -2;
1546
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001547 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548
Guido van Rossum4c08d552000-03-10 22:55:18 +00001549 if (dir > 0) {
1550 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 last -= n;
1553 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001554 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001555 return (long)i;
1556 }
1557 else {
1558 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001559
Guido van Rossum4c08d552000-03-10 22:55:18 +00001560 if (n == 0 && i <= last)
1561 return (long)last;
1562 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001563 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001564 return (long)j;
1565 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001566
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567 return -1;
1568}
1569
1570
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001571PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001572"S.find(sub [,start [,end]]) -> int\n\
1573\n\
1574Return the lowest index in S where substring sub is found,\n\
1575such that sub is contained within s[start,end]. Optional\n\
1576arguments start and end are interpreted as in slice notation.\n\
1577\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001578Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579
1580static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001581string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001583 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001584 if (result == -2)
1585 return NULL;
1586 return PyInt_FromLong(result);
1587}
1588
1589
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001590PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001591"S.index(sub [,start [,end]]) -> int\n\
1592\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001593Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001594
1595static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001596string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001597{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001598 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001599 if (result == -2)
1600 return NULL;
1601 if (result == -1) {
1602 PyErr_SetString(PyExc_ValueError,
1603 "substring not found in string.index");
1604 return NULL;
1605 }
1606 return PyInt_FromLong(result);
1607}
1608
1609
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001610PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001611"S.rfind(sub [,start [,end]]) -> int\n\
1612\n\
1613Return the highest index in S where substring sub is found,\n\
1614such that sub is contained within s[start,end]. Optional\n\
1615arguments start and end are interpreted as in slice notation.\n\
1616\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001617Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618
1619static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001620string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001621{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623 if (result == -2)
1624 return NULL;
1625 return PyInt_FromLong(result);
1626}
1627
1628
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001629PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630"S.rindex(sub [,start [,end]]) -> int\n\
1631\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001632Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633
1634static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001635string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001637 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 if (result == -2)
1639 return NULL;
1640 if (result == -1) {
1641 PyErr_SetString(PyExc_ValueError,
1642 "substring not found in string.rindex");
1643 return NULL;
1644 }
1645 return PyInt_FromLong(result);
1646}
1647
1648
1649static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001650do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1651{
1652 char *s = PyString_AS_STRING(self);
1653 int len = PyString_GET_SIZE(self);
1654 char *sep = PyString_AS_STRING(sepobj);
1655 int seplen = PyString_GET_SIZE(sepobj);
1656 int i, j;
1657
1658 i = 0;
1659 if (striptype != RIGHTSTRIP) {
1660 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1661 i++;
1662 }
1663 }
1664
1665 j = len;
1666 if (striptype != LEFTSTRIP) {
1667 do {
1668 j--;
1669 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1670 j++;
1671 }
1672
1673 if (i == 0 && j == len && PyString_CheckExact(self)) {
1674 Py_INCREF(self);
1675 return (PyObject*)self;
1676 }
1677 else
1678 return PyString_FromStringAndSize(s+i, j-i);
1679}
1680
1681
1682static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001683do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684{
1685 char *s = PyString_AS_STRING(self);
1686 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001688 i = 0;
1689 if (striptype != RIGHTSTRIP) {
1690 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1691 i++;
1692 }
1693 }
1694
1695 j = len;
1696 if (striptype != LEFTSTRIP) {
1697 do {
1698 j--;
1699 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1700 j++;
1701 }
1702
Tim Peters8fa5dd02001-09-12 02:18:30 +00001703 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704 Py_INCREF(self);
1705 return (PyObject*)self;
1706 }
1707 else
1708 return PyString_FromStringAndSize(s+i, j-i);
1709}
1710
1711
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001712static PyObject *
1713do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1714{
1715 PyObject *sep = NULL;
1716
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001717 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001718 return NULL;
1719
1720 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001721 if (PyString_Check(sep))
1722 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001723#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001724 else if (PyUnicode_Check(sep)) {
1725 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1726 PyObject *res;
1727 if (uniself==NULL)
1728 return NULL;
1729 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1730 striptype, sep);
1731 Py_DECREF(uniself);
1732 return res;
1733 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001734#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001735 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001736 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001737#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001738 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001739#else
1740 "%s arg must be None or str",
1741#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001742 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001743 return NULL;
1744 }
1745 return do_xstrip(self, striptype, sep);
1746 }
1747
1748 return do_strip(self, striptype);
1749}
1750
1751
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001752PyDoc_STRVAR(strip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001753"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754\n\
1755Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001756whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001757If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001758If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759
1760static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001761string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001763 if (PyTuple_GET_SIZE(args) == 0)
1764 return do_strip(self, BOTHSTRIP); /* Common case */
1765 else
1766 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767}
1768
1769
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001770PyDoc_STRVAR(lstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001771"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001773Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001774If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776
1777static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001778string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001780 if (PyTuple_GET_SIZE(args) == 0)
1781 return do_strip(self, LEFTSTRIP); /* Common case */
1782 else
1783 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784}
1785
1786
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001787PyDoc_STRVAR(rstrip__doc__,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001788"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001790Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001791If sep is given and not None, remove characters in sep instead.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001792If sep is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793
1794static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001795string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001797 if (PyTuple_GET_SIZE(args) == 0)
1798 return do_strip(self, RIGHTSTRIP); /* Common case */
1799 else
1800 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801}
1802
1803
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001804PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805"S.lower() -> string\n\
1806\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001807Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808
1809static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001810string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811{
1812 char *s = PyString_AS_STRING(self), *s_new;
1813 int i, n = PyString_GET_SIZE(self);
1814 PyObject *new;
1815
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816 new = PyString_FromStringAndSize(NULL, n);
1817 if (new == NULL)
1818 return NULL;
1819 s_new = PyString_AsString(new);
1820 for (i = 0; i < n; i++) {
1821 int c = Py_CHARMASK(*s++);
1822 if (isupper(c)) {
1823 *s_new = tolower(c);
1824 } else
1825 *s_new = c;
1826 s_new++;
1827 }
1828 return new;
1829}
1830
1831
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001832PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001833"S.upper() -> string\n\
1834\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001835Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836
1837static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001838string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839{
1840 char *s = PyString_AS_STRING(self), *s_new;
1841 int i, n = PyString_GET_SIZE(self);
1842 PyObject *new;
1843
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001844 new = PyString_FromStringAndSize(NULL, n);
1845 if (new == NULL)
1846 return NULL;
1847 s_new = PyString_AsString(new);
1848 for (i = 0; i < n; i++) {
1849 int c = Py_CHARMASK(*s++);
1850 if (islower(c)) {
1851 *s_new = toupper(c);
1852 } else
1853 *s_new = c;
1854 s_new++;
1855 }
1856 return new;
1857}
1858
1859
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001860PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001861"S.title() -> string\n\
1862\n\
1863Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001864characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865
1866static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001867string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868{
1869 char *s = PyString_AS_STRING(self), *s_new;
1870 int i, n = PyString_GET_SIZE(self);
1871 int previous_is_cased = 0;
1872 PyObject *new;
1873
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 new = PyString_FromStringAndSize(NULL, n);
1875 if (new == NULL)
1876 return NULL;
1877 s_new = PyString_AsString(new);
1878 for (i = 0; i < n; i++) {
1879 int c = Py_CHARMASK(*s++);
1880 if (islower(c)) {
1881 if (!previous_is_cased)
1882 c = toupper(c);
1883 previous_is_cased = 1;
1884 } else if (isupper(c)) {
1885 if (previous_is_cased)
1886 c = tolower(c);
1887 previous_is_cased = 1;
1888 } else
1889 previous_is_cased = 0;
1890 *s_new++ = c;
1891 }
1892 return new;
1893}
1894
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001895PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896"S.capitalize() -> string\n\
1897\n\
1898Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001899capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900
1901static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001902string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903{
1904 char *s = PyString_AS_STRING(self), *s_new;
1905 int i, n = PyString_GET_SIZE(self);
1906 PyObject *new;
1907
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 new = PyString_FromStringAndSize(NULL, n);
1909 if (new == NULL)
1910 return NULL;
1911 s_new = PyString_AsString(new);
1912 if (0 < n) {
1913 int c = Py_CHARMASK(*s++);
1914 if (islower(c))
1915 *s_new = toupper(c);
1916 else
1917 *s_new = c;
1918 s_new++;
1919 }
1920 for (i = 1; i < n; i++) {
1921 int c = Py_CHARMASK(*s++);
1922 if (isupper(c))
1923 *s_new = tolower(c);
1924 else
1925 *s_new = c;
1926 s_new++;
1927 }
1928 return new;
1929}
1930
1931
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001932PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933"S.count(sub[, start[, end]]) -> int\n\
1934\n\
1935Return the number of occurrences of substring sub in string\n\
1936S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001937interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001938
1939static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001940string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001942 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943 int len = PyString_GET_SIZE(self), n;
1944 int i = 0, last = INT_MAX;
1945 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001946 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947
Guido van Rossumc6821402000-05-08 14:08:05 +00001948 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1949 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001951
Guido van Rossum4c08d552000-03-10 22:55:18 +00001952 if (PyString_Check(subobj)) {
1953 sub = PyString_AS_STRING(subobj);
1954 n = PyString_GET_SIZE(subobj);
1955 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001956#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001957 else if (PyUnicode_Check(subobj)) {
1958 int count;
1959 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1960 if (count == -1)
1961 return NULL;
1962 else
1963 return PyInt_FromLong((long) count);
1964 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001965#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001966 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1967 return NULL;
1968
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001969 string_adjust_indices(&i, &last, len);
1970
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971 m = last + 1 - n;
1972 if (n == 0)
1973 return PyInt_FromLong((long) (m-i));
1974
1975 r = 0;
1976 while (i < m) {
1977 if (!memcmp(s+i, sub, n)) {
1978 r++;
1979 i += n;
1980 } else {
1981 i++;
1982 }
1983 }
1984 return PyInt_FromLong((long) r);
1985}
1986
1987
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001988PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989"S.swapcase() -> string\n\
1990\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001991Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001992converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993
1994static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001995string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996{
1997 char *s = PyString_AS_STRING(self), *s_new;
1998 int i, n = PyString_GET_SIZE(self);
1999 PyObject *new;
2000
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001 new = PyString_FromStringAndSize(NULL, n);
2002 if (new == NULL)
2003 return NULL;
2004 s_new = PyString_AsString(new);
2005 for (i = 0; i < n; i++) {
2006 int c = Py_CHARMASK(*s++);
2007 if (islower(c)) {
2008 *s_new = toupper(c);
2009 }
2010 else if (isupper(c)) {
2011 *s_new = tolower(c);
2012 }
2013 else
2014 *s_new = c;
2015 s_new++;
2016 }
2017 return new;
2018}
2019
2020
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002021PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002022"S.translate(table [,deletechars]) -> string\n\
2023\n\
2024Return a copy of the string S, where all characters occurring\n\
2025in the optional argument deletechars are removed, and the\n\
2026remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002027translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028
2029static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002030string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002032 register char *input, *output;
2033 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034 register int i, c, changed = 0;
2035 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002036 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037 int inlen, tablen, dellen = 0;
2038 PyObject *result;
2039 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002040 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041
Guido van Rossum4c08d552000-03-10 22:55:18 +00002042 if (!PyArg_ParseTuple(args, "O|O:translate",
2043 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002045
2046 if (PyString_Check(tableobj)) {
2047 table1 = PyString_AS_STRING(tableobj);
2048 tablen = PyString_GET_SIZE(tableobj);
2049 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002050#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002051 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002052 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002053 parameter; instead a mapping to None will cause characters
2054 to be deleted. */
2055 if (delobj != NULL) {
2056 PyErr_SetString(PyExc_TypeError,
2057 "deletions are implemented differently for unicode");
2058 return NULL;
2059 }
2060 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2061 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002062#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065
2066 if (delobj != NULL) {
2067 if (PyString_Check(delobj)) {
2068 del_table = PyString_AS_STRING(delobj);
2069 dellen = PyString_GET_SIZE(delobj);
2070 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002071#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002072 else if (PyUnicode_Check(delobj)) {
2073 PyErr_SetString(PyExc_TypeError,
2074 "deletions are implemented differently for unicode");
2075 return NULL;
2076 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002077#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2079 return NULL;
2080
2081 if (tablen != 256) {
2082 PyErr_SetString(PyExc_ValueError,
2083 "translation table must be 256 characters long");
2084 return NULL;
2085 }
2086 }
2087 else {
2088 del_table = NULL;
2089 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090 }
2091
2092 table = table1;
2093 inlen = PyString_Size(input_obj);
2094 result = PyString_FromStringAndSize((char *)NULL, inlen);
2095 if (result == NULL)
2096 return NULL;
2097 output_start = output = PyString_AsString(result);
2098 input = PyString_AsString(input_obj);
2099
2100 if (dellen == 0) {
2101 /* If no deletions are required, use faster code */
2102 for (i = inlen; --i >= 0; ) {
2103 c = Py_CHARMASK(*input++);
2104 if (Py_CHARMASK((*output++ = table[c])) != c)
2105 changed = 1;
2106 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002107 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108 return result;
2109 Py_DECREF(result);
2110 Py_INCREF(input_obj);
2111 return input_obj;
2112 }
2113
2114 for (i = 0; i < 256; i++)
2115 trans_table[i] = Py_CHARMASK(table[i]);
2116
2117 for (i = 0; i < dellen; i++)
2118 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2119
2120 for (i = inlen; --i >= 0; ) {
2121 c = Py_CHARMASK(*input++);
2122 if (trans_table[c] != -1)
2123 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2124 continue;
2125 changed = 1;
2126 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002127 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 Py_DECREF(result);
2129 Py_INCREF(input_obj);
2130 return input_obj;
2131 }
2132 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002133 if (inlen > 0)
2134 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135 return result;
2136}
2137
2138
2139/* What follows is used for implementing replace(). Perry Stoll. */
2140
2141/*
2142 mymemfind
2143
2144 strstr replacement for arbitrary blocks of memory.
2145
Barry Warsaw51ac5802000-03-20 16:36:48 +00002146 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 contents of memory pointed to by PAT. Returns the index into MEM if
2148 found, or -1 if not found. If len of PAT is greater than length of
2149 MEM, the function returns -1.
2150*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002151static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002152mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153{
2154 register int ii;
2155
2156 /* pattern can not occur in the last pat_len-1 chars */
2157 len -= pat_len;
2158
2159 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002160 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161 return ii;
2162 }
2163 }
2164 return -1;
2165}
2166
2167/*
2168 mymemcnt
2169
2170 Return the number of distinct times PAT is found in MEM.
2171 meaning mem=1111 and pat==11 returns 2.
2172 mem=11111 and pat==11 also return 2.
2173 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002174static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002175mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176{
2177 register int offset = 0;
2178 int nfound = 0;
2179
2180 while (len >= 0) {
2181 offset = mymemfind(mem, len, pat, pat_len);
2182 if (offset == -1)
2183 break;
2184 mem += offset + pat_len;
2185 len -= offset + pat_len;
2186 nfound++;
2187 }
2188 return nfound;
2189}
2190
2191/*
2192 mymemreplace
2193
Thomas Wouters7e474022000-07-16 12:04:32 +00002194 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195 replaced with SUB.
2196
Thomas Wouters7e474022000-07-16 12:04:32 +00002197 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198 of PAT in STR, then the original string is returned. Otherwise, a new
2199 string is allocated here and returned.
2200
2201 on return, out_len is:
2202 the length of output string, or
2203 -1 if the input string is returned, or
2204 unchanged if an error occurs (no memory).
2205
2206 return value is:
2207 the new string allocated locally, or
2208 NULL if an error occurred.
2209*/
2210static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002211mymemreplace(const char *str, int len, /* input string */
2212 const char *pat, int pat_len, /* pattern string to find */
2213 const char *sub, int sub_len, /* substitution string */
2214 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002215 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002216{
2217 char *out_s;
2218 char *new_s;
2219 int nfound, offset, new_len;
2220
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002221 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222 goto return_same;
2223
2224 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002225 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002226 if (count < 0)
2227 count = INT_MAX;
2228 else if (nfound > count)
2229 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 if (nfound == 0)
2231 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002232
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002234 if (new_len == 0) {
2235 /* Have to allocate something for the caller to free(). */
2236 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002237 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002238 return NULL;
2239 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002241 else {
2242 assert(new_len > 0);
2243 new_s = (char *)PyMem_MALLOC(new_len);
2244 if (new_s == NULL)
2245 return NULL;
2246 out_s = new_s;
2247
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002248 if (pat_len > 0) {
2249 for (; nfound > 0; --nfound) {
2250 /* find index of next instance of pattern */
2251 offset = mymemfind(str, len, pat, pat_len);
2252 if (offset == -1)
2253 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002254
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002255 /* copy non matching part of input string */
2256 memcpy(new_s, str, offset);
2257 str += offset + pat_len;
2258 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002259
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002260 /* copy substitute into the output string */
2261 new_s += offset;
2262 memcpy(new_s, sub, sub_len);
2263 new_s += sub_len;
2264 }
2265 /* copy any remaining values into output string */
2266 if (len > 0)
2267 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002268 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002269 else {
2270 for (;;++str, --len) {
2271 memcpy(new_s, sub, sub_len);
2272 new_s += sub_len;
2273 if (--nfound <= 0) {
2274 memcpy(new_s, str, len);
2275 break;
2276 }
2277 *new_s++ = *str;
2278 }
2279 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002280 }
2281 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002282 return out_s;
2283
2284 return_same:
2285 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002286 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287}
2288
2289
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002290PyDoc_STRVAR(replace__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291"S.replace (old, new[, maxsplit]) -> string\n\
2292\n\
2293Return a copy of string S with all occurrences of substring\n\
2294old replaced by new. If the optional argument maxsplit is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002295given, only the first maxsplit occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002296
2297static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002298string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300 const char *str = PyString_AS_STRING(self), *sub, *repl;
2301 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002302 const int len = PyString_GET_SIZE(self);
2303 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002304 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308 if (!PyArg_ParseTuple(args, "OO|i:replace",
2309 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311
2312 if (PyString_Check(subobj)) {
2313 sub = PyString_AS_STRING(subobj);
2314 sub_len = PyString_GET_SIZE(subobj);
2315 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002316#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002318 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002319 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002320#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002321 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2322 return NULL;
2323
2324 if (PyString_Check(replobj)) {
2325 repl = PyString_AS_STRING(replobj);
2326 repl_len = PyString_GET_SIZE(replobj);
2327 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002328#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002329 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002330 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002332#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002333 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2334 return NULL;
2335
Guido van Rossum4c08d552000-03-10 22:55:18 +00002336 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 if (new_s == NULL) {
2338 PyErr_NoMemory();
2339 return NULL;
2340 }
2341 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002342 if (PyString_CheckExact(self)) {
2343 /* we're returning another reference to self */
2344 new = (PyObject*)self;
2345 Py_INCREF(new);
2346 }
2347 else {
2348 new = PyString_FromStringAndSize(str, len);
2349 if (new == NULL)
2350 return NULL;
2351 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352 }
2353 else {
2354 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002355 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356 }
2357 return new;
2358}
2359
2360
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002361PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002362"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002363\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002364Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002366comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002367
2368static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002369string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002373 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374 int plen;
2375 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002376 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002377 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378
Guido van Rossumc6821402000-05-08 14:08:05 +00002379 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2380 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381 return NULL;
2382 if (PyString_Check(subobj)) {
2383 prefix = PyString_AS_STRING(subobj);
2384 plen = PyString_GET_SIZE(subobj);
2385 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002386#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002387 else if (PyUnicode_Check(subobj)) {
2388 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002389 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002390 subobj, start, end, -1);
2391 if (rc == -1)
2392 return NULL;
2393 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002394 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002395 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002396#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002397 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398 return NULL;
2399
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002400 string_adjust_indices(&start, &end, len);
2401
2402 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002403 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002405 if (end-start >= plen)
2406 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2407 else
2408 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409}
2410
2411
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002412PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002413"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002414\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002415Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416optional start, test S beginning at that position. With optional end, stop\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002417comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418
2419static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002420string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002422 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002424 const char* suffix;
2425 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002427 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429
Guido van Rossumc6821402000-05-08 14:08:05 +00002430 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2431 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432 return NULL;
2433 if (PyString_Check(subobj)) {
2434 suffix = PyString_AS_STRING(subobj);
2435 slen = PyString_GET_SIZE(subobj);
2436 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002437#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002438 else if (PyUnicode_Check(subobj)) {
2439 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002440 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002441 subobj, start, end, +1);
2442 if (rc == -1)
2443 return NULL;
2444 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002445 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002446 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002447#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449 return NULL;
2450
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002451 string_adjust_indices(&start, &end, len);
2452
2453 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002454 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002456 if (end-slen > start)
2457 start = end - slen;
2458 if (end-start >= slen)
2459 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2460 else
2461 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002462}
2463
2464
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002465PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002466"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002467\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002468Encodes S using the codec registered for encoding. encoding defaults\n\
2469to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002470handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002471a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002472
2473static PyObject *
2474string_encode(PyStringObject *self, PyObject *args)
2475{
2476 char *encoding = NULL;
2477 char *errors = NULL;
2478 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2479 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002480 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2481}
2482
2483
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002484PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002485"S.decode([encoding[,errors]]) -> object\n\
2486\n\
2487Decodes S using the codec registered for encoding. encoding defaults\n\
2488to the default encoding. errors may be given to set a different error\n\
2489handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002490a ValueError. Other possible values are 'ignore' and 'replace'.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002491
2492static PyObject *
2493string_decode(PyStringObject *self, PyObject *args)
2494{
2495 char *encoding = NULL;
2496 char *errors = NULL;
2497 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2498 return NULL;
2499 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002500}
2501
2502
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002503PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002504"S.expandtabs([tabsize]) -> string\n\
2505\n\
2506Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002507If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002508
2509static PyObject*
2510string_expandtabs(PyStringObject *self, PyObject *args)
2511{
2512 const char *e, *p;
2513 char *q;
2514 int i, j;
2515 PyObject *u;
2516 int tabsize = 8;
2517
2518 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2519 return NULL;
2520
Thomas Wouters7e474022000-07-16 12:04:32 +00002521 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522 i = j = 0;
2523 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2524 for (p = PyString_AS_STRING(self); p < e; p++)
2525 if (*p == '\t') {
2526 if (tabsize > 0)
2527 j += tabsize - (j % tabsize);
2528 }
2529 else {
2530 j++;
2531 if (*p == '\n' || *p == '\r') {
2532 i += j;
2533 j = 0;
2534 }
2535 }
2536
2537 /* Second pass: create output string and fill it */
2538 u = PyString_FromStringAndSize(NULL, i + j);
2539 if (!u)
2540 return NULL;
2541
2542 j = 0;
2543 q = PyString_AS_STRING(u);
2544
2545 for (p = PyString_AS_STRING(self); p < e; p++)
2546 if (*p == '\t') {
2547 if (tabsize > 0) {
2548 i = tabsize - (j % tabsize);
2549 j += i;
2550 while (i--)
2551 *q++ = ' ';
2552 }
2553 }
2554 else {
2555 j++;
2556 *q++ = *p;
2557 if (*p == '\n' || *p == '\r')
2558 j = 0;
2559 }
2560
2561 return u;
2562}
2563
Tim Peters8fa5dd02001-09-12 02:18:30 +00002564static PyObject *
2565pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002566{
2567 PyObject *u;
2568
2569 if (left < 0)
2570 left = 0;
2571 if (right < 0)
2572 right = 0;
2573
Tim Peters8fa5dd02001-09-12 02:18:30 +00002574 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002575 Py_INCREF(self);
2576 return (PyObject *)self;
2577 }
2578
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002579 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580 left + PyString_GET_SIZE(self) + right);
2581 if (u) {
2582 if (left)
2583 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002584 memcpy(PyString_AS_STRING(u) + left,
2585 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002586 PyString_GET_SIZE(self));
2587 if (right)
2588 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2589 fill, right);
2590 }
2591
2592 return u;
2593}
2594
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002595PyDoc_STRVAR(ljust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002596"S.ljust(width) -> string\n"
2597"\n"
2598"Return S left justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002599"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002600
2601static PyObject *
2602string_ljust(PyStringObject *self, PyObject *args)
2603{
2604 int width;
2605 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2606 return NULL;
2607
Tim Peters8fa5dd02001-09-12 02:18:30 +00002608 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002609 Py_INCREF(self);
2610 return (PyObject*) self;
2611 }
2612
2613 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2614}
2615
2616
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002617PyDoc_STRVAR(rjust__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002618"S.rjust(width) -> string\n"
2619"\n"
2620"Return S right justified in a string of length width. Padding is\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002621"done using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002622
2623static PyObject *
2624string_rjust(PyStringObject *self, PyObject *args)
2625{
2626 int width;
2627 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2628 return NULL;
2629
Tim Peters8fa5dd02001-09-12 02:18:30 +00002630 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002631 Py_INCREF(self);
2632 return (PyObject*) self;
2633 }
2634
2635 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2636}
2637
2638
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002639PyDoc_STRVAR(center__doc__,
Tim Peters8fa5dd02001-09-12 02:18:30 +00002640"S.center(width) -> string\n"
2641"\n"
2642"Return S centered in a string of length width. Padding is done\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002643"using spaces.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002644
2645static PyObject *
2646string_center(PyStringObject *self, PyObject *args)
2647{
2648 int marg, left;
2649 int width;
2650
2651 if (!PyArg_ParseTuple(args, "i:center", &width))
2652 return NULL;
2653
Tim Peters8fa5dd02001-09-12 02:18:30 +00002654 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655 Py_INCREF(self);
2656 return (PyObject*) self;
2657 }
2658
2659 marg = width - PyString_GET_SIZE(self);
2660 left = marg / 2 + (marg & width & 1);
2661
2662 return pad(self, left, marg - left, ' ');
2663}
2664
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002665PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002666"S.zfill(width) -> string\n"
2667"\n"
2668"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002669"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002670
2671static PyObject *
2672string_zfill(PyStringObject *self, PyObject *args)
2673{
2674 int fill;
2675 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002676 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002677
2678 int width;
2679 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2680 return NULL;
2681
2682 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002683 if (PyString_CheckExact(self)) {
2684 Py_INCREF(self);
2685 return (PyObject*) self;
2686 }
2687 else
2688 return PyString_FromStringAndSize(
2689 PyString_AS_STRING(self),
2690 PyString_GET_SIZE(self)
2691 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002692 }
2693
2694 fill = width - PyString_GET_SIZE(self);
2695
2696 s = pad(self, fill, 0, '0');
2697
2698 if (s == NULL)
2699 return NULL;
2700
2701 p = PyString_AS_STRING(s);
2702 if (p[fill] == '+' || p[fill] == '-') {
2703 /* move sign to beginning of string */
2704 p[0] = p[fill];
2705 p[fill] = '0';
2706 }
2707
2708 return (PyObject*) s;
2709}
2710
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002711PyDoc_STRVAR(isspace__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002712"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002713"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002714"Return True if there are only whitespace characters in S,\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002715"False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002716
2717static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002718string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002719{
Fred Drakeba096332000-07-09 07:04:36 +00002720 register const unsigned char *p
2721 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002722 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002723
Guido van Rossum4c08d552000-03-10 22:55:18 +00002724 /* Shortcut for single character strings */
2725 if (PyString_GET_SIZE(self) == 1 &&
2726 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002727 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002728
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002729 /* Special case for empty strings */
2730 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002731 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002732
Guido van Rossum4c08d552000-03-10 22:55:18 +00002733 e = p + PyString_GET_SIZE(self);
2734 for (; p < e; p++) {
2735 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002736 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002737 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002738 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002739}
2740
2741
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002742PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002743"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002744\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002745Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002746and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002747
2748static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002749string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002750{
Fred Drakeba096332000-07-09 07:04:36 +00002751 register const unsigned char *p
2752 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002753 register const unsigned char *e;
2754
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002755 /* Shortcut for single character strings */
2756 if (PyString_GET_SIZE(self) == 1 &&
2757 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002758 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002759
2760 /* Special case for empty strings */
2761 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002762 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002763
2764 e = p + PyString_GET_SIZE(self);
2765 for (; p < e; p++) {
2766 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002767 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002768 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002769 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002770}
2771
2772
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002773PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002774"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002775\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002776Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002777and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002778
2779static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002780string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002781{
Fred Drakeba096332000-07-09 07:04:36 +00002782 register const unsigned char *p
2783 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002784 register const unsigned char *e;
2785
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002786 /* Shortcut for single character strings */
2787 if (PyString_GET_SIZE(self) == 1 &&
2788 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002789 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002790
2791 /* Special case for empty strings */
2792 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002793 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002794
2795 e = p + PyString_GET_SIZE(self);
2796 for (; p < e; p++) {
2797 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002798 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002799 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002800 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002801}
2802
2803
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002804PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002805"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002806\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002807Return True if there are only digit characters in S,\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002808False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002809
2810static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002811string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002812{
Fred Drakeba096332000-07-09 07:04:36 +00002813 register const unsigned char *p
2814 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002815 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002816
Guido van Rossum4c08d552000-03-10 22:55:18 +00002817 /* Shortcut for single character strings */
2818 if (PyString_GET_SIZE(self) == 1 &&
2819 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002820 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002821
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002822 /* Special case for empty strings */
2823 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002824 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002825
Guido van Rossum4c08d552000-03-10 22:55:18 +00002826 e = p + PyString_GET_SIZE(self);
2827 for (; p < e; p++) {
2828 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002829 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002830 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002831 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002832}
2833
2834
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002835PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002836"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002837\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002838Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002839at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002840
2841static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002842string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002843{
Fred Drakeba096332000-07-09 07:04:36 +00002844 register const unsigned char *p
2845 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002846 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002847 int cased;
2848
Guido van Rossum4c08d552000-03-10 22:55:18 +00002849 /* Shortcut for single character strings */
2850 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002851 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002852
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002853 /* Special case for empty strings */
2854 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002855 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002856
Guido van Rossum4c08d552000-03-10 22:55:18 +00002857 e = p + PyString_GET_SIZE(self);
2858 cased = 0;
2859 for (; p < e; p++) {
2860 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002861 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002862 else if (!cased && islower(*p))
2863 cased = 1;
2864 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002865 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002866}
2867
2868
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002869PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002870"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002872Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002873at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002874
2875static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002876string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877{
Fred Drakeba096332000-07-09 07:04:36 +00002878 register const unsigned char *p
2879 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002880 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002881 int cased;
2882
Guido van Rossum4c08d552000-03-10 22:55:18 +00002883 /* Shortcut for single character strings */
2884 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002885 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002886
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002887 /* Special case for empty strings */
2888 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002889 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002890
Guido van Rossum4c08d552000-03-10 22:55:18 +00002891 e = p + PyString_GET_SIZE(self);
2892 cased = 0;
2893 for (; p < e; p++) {
2894 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002895 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002896 else if (!cased && isupper(*p))
2897 cased = 1;
2898 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002899 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002900}
2901
2902
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002903PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002904"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002905\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002906Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002907may only follow uncased characters and lowercase characters only cased\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002908ones. Return False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002909
2910static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002911string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002912{
Fred Drakeba096332000-07-09 07:04:36 +00002913 register const unsigned char *p
2914 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002915 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002916 int cased, previous_is_cased;
2917
Guido van Rossum4c08d552000-03-10 22:55:18 +00002918 /* Shortcut for single character strings */
2919 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002920 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002921
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002922 /* Special case for empty strings */
2923 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002924 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002925
Guido van Rossum4c08d552000-03-10 22:55:18 +00002926 e = p + PyString_GET_SIZE(self);
2927 cased = 0;
2928 previous_is_cased = 0;
2929 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002930 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002931
2932 if (isupper(ch)) {
2933 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002934 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002935 previous_is_cased = 1;
2936 cased = 1;
2937 }
2938 else if (islower(ch)) {
2939 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002940 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002941 previous_is_cased = 1;
2942 cased = 1;
2943 }
2944 else
2945 previous_is_cased = 0;
2946 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002947 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002948}
2949
2950
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002951PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00002952"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002953\n\
2954Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002955Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002956is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002957
2958#define SPLIT_APPEND(data, left, right) \
2959 str = PyString_FromStringAndSize(data + left, right - left); \
2960 if (!str) \
2961 goto onError; \
2962 if (PyList_Append(list, str)) { \
2963 Py_DECREF(str); \
2964 goto onError; \
2965 } \
2966 else \
2967 Py_DECREF(str);
2968
2969static PyObject*
2970string_splitlines(PyStringObject *self, PyObject *args)
2971{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002972 register int i;
2973 register int j;
2974 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002975 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002976 PyObject *list;
2977 PyObject *str;
2978 char *data;
2979
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002980 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002981 return NULL;
2982
2983 data = PyString_AS_STRING(self);
2984 len = PyString_GET_SIZE(self);
2985
Guido van Rossum4c08d552000-03-10 22:55:18 +00002986 list = PyList_New(0);
2987 if (!list)
2988 goto onError;
2989
2990 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002991 int eol;
2992
Guido van Rossum4c08d552000-03-10 22:55:18 +00002993 /* Find a line and append it */
2994 while (i < len && data[i] != '\n' && data[i] != '\r')
2995 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002996
2997 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002998 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002999 if (i < len) {
3000 if (data[i] == '\r' && i + 1 < len &&
3001 data[i+1] == '\n')
3002 i += 2;
3003 else
3004 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003005 if (keepends)
3006 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003007 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003008 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003009 j = i;
3010 }
3011 if (j < len) {
3012 SPLIT_APPEND(data, j, len);
3013 }
3014
3015 return list;
3016
3017 onError:
3018 Py_DECREF(list);
3019 return NULL;
3020}
3021
3022#undef SPLIT_APPEND
3023
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003024
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003025static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003026string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003027 /* Counterparts of the obsolete stropmodule functions; except
3028 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003029 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3030 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3031 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3032 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003033 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3034 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3035 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3036 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3037 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3038 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3039 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003040 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3041 capitalize__doc__},
3042 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3043 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3044 endswith__doc__},
3045 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3046 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3047 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3048 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3049 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3050 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3051 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3052 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3053 startswith__doc__},
3054 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3055 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3056 swapcase__doc__},
3057 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3058 translate__doc__},
3059 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3060 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3061 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3062 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3063 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3064 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3065 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3066 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3067 expandtabs__doc__},
3068 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3069 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003070 {NULL, NULL} /* sentinel */
3071};
3072
Jeremy Hylton938ace62002-07-17 16:30:39 +00003073static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003074str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3075
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003076static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003077string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003078{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003079 PyObject *x = NULL;
3080 static char *kwlist[] = {"object", 0};
3081
Guido van Rossumae960af2001-08-30 03:11:59 +00003082 if (type != &PyString_Type)
3083 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003084 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3085 return NULL;
3086 if (x == NULL)
3087 return PyString_FromString("");
3088 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003089}
3090
Guido van Rossumae960af2001-08-30 03:11:59 +00003091static PyObject *
3092str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3093{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003094 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003095 int n;
3096
3097 assert(PyType_IsSubtype(type, &PyString_Type));
3098 tmp = string_new(&PyString_Type, args, kwds);
3099 if (tmp == NULL)
3100 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003101 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003102 n = PyString_GET_SIZE(tmp);
3103 pnew = type->tp_alloc(type, n);
3104 if (pnew != NULL) {
3105 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003106 ((PyStringObject *)pnew)->ob_shash =
3107 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003108 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003109 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003110 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003111 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003112}
3113
Guido van Rossumcacfc072002-05-24 19:01:59 +00003114static PyObject *
3115basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3116{
3117 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003118 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003119 return NULL;
3120}
3121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003122PyDoc_STRVAR(basestring_doc,
3123"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003124
3125PyTypeObject PyBaseString_Type = {
3126 PyObject_HEAD_INIT(&PyType_Type)
3127 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003128 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003129 0,
3130 0,
3131 0, /* tp_dealloc */
3132 0, /* tp_print */
3133 0, /* tp_getattr */
3134 0, /* tp_setattr */
3135 0, /* tp_compare */
3136 0, /* tp_repr */
3137 0, /* tp_as_number */
3138 0, /* tp_as_sequence */
3139 0, /* tp_as_mapping */
3140 0, /* tp_hash */
3141 0, /* tp_call */
3142 0, /* tp_str */
3143 0, /* tp_getattro */
3144 0, /* tp_setattro */
3145 0, /* tp_as_buffer */
3146 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3147 basestring_doc, /* tp_doc */
3148 0, /* tp_traverse */
3149 0, /* tp_clear */
3150 0, /* tp_richcompare */
3151 0, /* tp_weaklistoffset */
3152 0, /* tp_iter */
3153 0, /* tp_iternext */
3154 0, /* tp_methods */
3155 0, /* tp_members */
3156 0, /* tp_getset */
3157 &PyBaseObject_Type, /* tp_base */
3158 0, /* tp_dict */
3159 0, /* tp_descr_get */
3160 0, /* tp_descr_set */
3161 0, /* tp_dictoffset */
3162 0, /* tp_init */
3163 0, /* tp_alloc */
3164 basestring_new, /* tp_new */
3165 0, /* tp_free */
3166};
3167
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003168PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003169"str(object) -> string\n\
3170\n\
3171Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003172If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003173
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003174PyTypeObject PyString_Type = {
3175 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003176 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003177 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003178 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003179 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003180 (destructor)string_dealloc, /* tp_dealloc */
3181 (printfunc)string_print, /* tp_print */
3182 0, /* tp_getattr */
3183 0, /* tp_setattr */
3184 0, /* tp_compare */
3185 (reprfunc)string_repr, /* tp_repr */
3186 0, /* tp_as_number */
3187 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003188 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003189 (hashfunc)string_hash, /* tp_hash */
3190 0, /* tp_call */
3191 (reprfunc)string_str, /* tp_str */
3192 PyObject_GenericGetAttr, /* tp_getattro */
3193 0, /* tp_setattro */
3194 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00003195 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003196 string_doc, /* tp_doc */
3197 0, /* tp_traverse */
3198 0, /* tp_clear */
3199 (richcmpfunc)string_richcompare, /* tp_richcompare */
3200 0, /* tp_weaklistoffset */
3201 0, /* tp_iter */
3202 0, /* tp_iternext */
3203 string_methods, /* tp_methods */
3204 0, /* tp_members */
3205 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003206 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003207 0, /* tp_dict */
3208 0, /* tp_descr_get */
3209 0, /* tp_descr_set */
3210 0, /* tp_dictoffset */
3211 0, /* tp_init */
3212 0, /* tp_alloc */
3213 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003214 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003215};
3216
3217void
Fred Drakeba096332000-07-09 07:04:36 +00003218PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003219{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003220 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003221 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003222 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003223 if (w == NULL || !PyString_Check(*pv)) {
3224 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003225 *pv = NULL;
3226 return;
3227 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003228 v = string_concat((PyStringObject *) *pv, w);
3229 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003230 *pv = v;
3231}
3232
Guido van Rossum013142a1994-08-30 08:19:36 +00003233void
Fred Drakeba096332000-07-09 07:04:36 +00003234PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003235{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003236 PyString_Concat(pv, w);
3237 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003238}
3239
3240
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003241/* The following function breaks the notion that strings are immutable:
3242 it changes the size of a string. We get away with this only if there
3243 is only one module referencing the object. You can also think of it
3244 as creating a new string object and destroying the old one, only
3245 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003246 already be known to some other part of the code...
3247 Note that if there's not enough memory to resize the string, the original
3248 string object at *pv is deallocated, *pv is set to NULL, an "out of
3249 memory" exception is set, and -1 is returned. Else (on success) 0 is
3250 returned, and the value in *pv may or may not be the same as on input.
3251 As always, an extra byte is allocated for a trailing \0 byte (newsize
3252 does *not* include that), and a trailing \0 byte is stored.
3253*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003254
3255int
Fred Drakeba096332000-07-09 07:04:36 +00003256_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003257{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003258 register PyObject *v;
3259 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003260 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003261 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003262 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003263 Py_DECREF(v);
3264 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003265 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003266 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003267 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003268 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003269 _Py_ForgetReference(v);
3270 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003271 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003272 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003273 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003274 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003275 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003276 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003277 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003278 _Py_NewReference(*pv);
3279 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003280 sv->ob_size = newsize;
3281 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003282 return 0;
3283}
Guido van Rossume5372401993-03-16 12:15:04 +00003284
3285/* Helpers for formatstring */
3286
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003287static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003288getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003289{
3290 int argidx = *p_argidx;
3291 if (argidx < arglen) {
3292 (*p_argidx)++;
3293 if (arglen < 0)
3294 return args;
3295 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003296 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003297 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003298 PyErr_SetString(PyExc_TypeError,
3299 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003300 return NULL;
3301}
3302
Tim Peters38fd5b62000-09-21 05:43:11 +00003303/* Format codes
3304 * F_LJUST '-'
3305 * F_SIGN '+'
3306 * F_BLANK ' '
3307 * F_ALT '#'
3308 * F_ZERO '0'
3309 */
Guido van Rossume5372401993-03-16 12:15:04 +00003310#define F_LJUST (1<<0)
3311#define F_SIGN (1<<1)
3312#define F_BLANK (1<<2)
3313#define F_ALT (1<<3)
3314#define F_ZERO (1<<4)
3315
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003316static int
Fred Drakeba096332000-07-09 07:04:36 +00003317formatfloat(char *buf, size_t buflen, int flags,
3318 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003319{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003320 /* fmt = '%#.' + `prec` + `type`
3321 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003322 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003323 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003324 x = PyFloat_AsDouble(v);
3325 if (x == -1.0 && PyErr_Occurred()) {
3326 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003327 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003328 }
Guido van Rossume5372401993-03-16 12:15:04 +00003329 if (prec < 0)
3330 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003331 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3332 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003333 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3334 (flags&F_ALT) ? "#" : "",
3335 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003336 /* worst case length calc to ensure no buffer overrun:
3337 fmt = %#.<prec>g
3338 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003339 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003340 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3341 If prec=0 the effective precision is 1 (the leading digit is
3342 always given), therefore increase by one to 10+prec. */
3343 if (buflen <= (size_t)10 + (size_t)prec) {
3344 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003345 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003346 return -1;
3347 }
Tim Peters885d4572001-11-28 20:27:42 +00003348 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003349 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003350}
3351
Tim Peters38fd5b62000-09-21 05:43:11 +00003352/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3353 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3354 * Python's regular ints.
3355 * Return value: a new PyString*, or NULL if error.
3356 * . *pbuf is set to point into it,
3357 * *plen set to the # of chars following that.
3358 * Caller must decref it when done using pbuf.
3359 * The string starting at *pbuf is of the form
3360 * "-"? ("0x" | "0X")? digit+
3361 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003362 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003363 * There will be at least prec digits, zero-filled on the left if
3364 * necessary to get that many.
3365 * val object to be converted
3366 * flags bitmask of format flags; only F_ALT is looked at
3367 * prec minimum number of digits; 0-fill on left if needed
3368 * type a character in [duoxX]; u acts the same as d
3369 *
3370 * CAUTION: o, x and X conversions on regular ints can never
3371 * produce a '-' sign, but can for Python's unbounded ints.
3372 */
3373PyObject*
3374_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3375 char **pbuf, int *plen)
3376{
3377 PyObject *result = NULL;
3378 char *buf;
3379 int i;
3380 int sign; /* 1 if '-', else 0 */
3381 int len; /* number of characters */
3382 int numdigits; /* len == numnondigits + numdigits */
3383 int numnondigits = 0;
3384
3385 switch (type) {
3386 case 'd':
3387 case 'u':
3388 result = val->ob_type->tp_str(val);
3389 break;
3390 case 'o':
3391 result = val->ob_type->tp_as_number->nb_oct(val);
3392 break;
3393 case 'x':
3394 case 'X':
3395 numnondigits = 2;
3396 result = val->ob_type->tp_as_number->nb_hex(val);
3397 break;
3398 default:
3399 assert(!"'type' not in [duoxX]");
3400 }
3401 if (!result)
3402 return NULL;
3403
3404 /* To modify the string in-place, there can only be one reference. */
3405 if (result->ob_refcnt != 1) {
3406 PyErr_BadInternalCall();
3407 return NULL;
3408 }
3409 buf = PyString_AsString(result);
3410 len = PyString_Size(result);
3411 if (buf[len-1] == 'L') {
3412 --len;
3413 buf[len] = '\0';
3414 }
3415 sign = buf[0] == '-';
3416 numnondigits += sign;
3417 numdigits = len - numnondigits;
3418 assert(numdigits > 0);
3419
Tim Petersfff53252001-04-12 18:38:48 +00003420 /* Get rid of base marker unless F_ALT */
3421 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003422 /* Need to skip 0x, 0X or 0. */
3423 int skipped = 0;
3424 switch (type) {
3425 case 'o':
3426 assert(buf[sign] == '0');
3427 /* If 0 is only digit, leave it alone. */
3428 if (numdigits > 1) {
3429 skipped = 1;
3430 --numdigits;
3431 }
3432 break;
3433 case 'x':
3434 case 'X':
3435 assert(buf[sign] == '0');
3436 assert(buf[sign + 1] == 'x');
3437 skipped = 2;
3438 numnondigits -= 2;
3439 break;
3440 }
3441 if (skipped) {
3442 buf += skipped;
3443 len -= skipped;
3444 if (sign)
3445 buf[0] = '-';
3446 }
3447 assert(len == numnondigits + numdigits);
3448 assert(numdigits > 0);
3449 }
3450
3451 /* Fill with leading zeroes to meet minimum width. */
3452 if (prec > numdigits) {
3453 PyObject *r1 = PyString_FromStringAndSize(NULL,
3454 numnondigits + prec);
3455 char *b1;
3456 if (!r1) {
3457 Py_DECREF(result);
3458 return NULL;
3459 }
3460 b1 = PyString_AS_STRING(r1);
3461 for (i = 0; i < numnondigits; ++i)
3462 *b1++ = *buf++;
3463 for (i = 0; i < prec - numdigits; i++)
3464 *b1++ = '0';
3465 for (i = 0; i < numdigits; i++)
3466 *b1++ = *buf++;
3467 *b1 = '\0';
3468 Py_DECREF(result);
3469 result = r1;
3470 buf = PyString_AS_STRING(result);
3471 len = numnondigits + prec;
3472 }
3473
3474 /* Fix up case for hex conversions. */
3475 switch (type) {
3476 case 'x':
3477 /* Need to convert all upper case letters to lower case. */
3478 for (i = 0; i < len; i++)
3479 if (buf[i] >= 'A' && buf[i] <= 'F')
3480 buf[i] += 'a'-'A';
3481 break;
3482 case 'X':
3483 /* Need to convert 0x to 0X (and -0x to -0X). */
3484 if (buf[sign + 1] == 'x')
3485 buf[sign + 1] = 'X';
3486 break;
3487 }
3488 *pbuf = buf;
3489 *plen = len;
3490 return result;
3491}
3492
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003493static int
Fred Drakeba096332000-07-09 07:04:36 +00003494formatint(char *buf, size_t buflen, int flags,
3495 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003496{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003497 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003498 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3499 + 1 + 1 = 24 */
3500 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003501 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003502
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003503 x = PyInt_AsLong(v);
3504 if (x == -1 && PyErr_Occurred()) {
3505 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003506 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003507 }
Guido van Rossum078151d2002-08-11 04:24:12 +00003508 if (x < 0 && type != 'd' && type != 'i') {
Guido van Rossum54df53a2002-08-14 18:38:27 +00003509 if (PyErr_Warn(PyExc_FutureWarning,
Guido van Rossum078151d2002-08-11 04:24:12 +00003510 "%u/%o/%x/%X of negative int will return "
3511 "a signed string in Python 2.4 and up") < 0)
3512 return -1;
3513 }
Guido van Rossume5372401993-03-16 12:15:04 +00003514 if (prec < 0)
3515 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003516
3517 if ((flags & F_ALT) &&
3518 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003519 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003520 * of issues that cause pain:
3521 * - when 0 is being converted, the C standard leaves off
3522 * the '0x' or '0X', which is inconsistent with other
3523 * %#x/%#X conversions and inconsistent with Python's
3524 * hex() function
3525 * - there are platforms that violate the standard and
3526 * convert 0 with the '0x' or '0X'
3527 * (Metrowerks, Compaq Tru64)
3528 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003529 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003530 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003531 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003532 * We can achieve the desired consistency by inserting our
3533 * own '0x' or '0X' prefix, and substituting %x/%X in place
3534 * of %#x/%#X.
3535 *
3536 * Note that this is the same approach as used in
3537 * formatint() in unicodeobject.c
3538 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003539 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003540 type, prec, type);
3541 }
3542 else {
3543 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003544 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003545 prec, type);
3546 }
3547
Tim Peters38fd5b62000-09-21 05:43:11 +00003548 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003549 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3550 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003551 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003552 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003553 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003554 return -1;
3555 }
Tim Peters885d4572001-11-28 20:27:42 +00003556 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003557 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003558}
3559
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003560static int
Fred Drakeba096332000-07-09 07:04:36 +00003561formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003562{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003563 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003564 if (PyString_Check(v)) {
3565 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003566 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003567 }
3568 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003569 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003570 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003571 }
3572 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003573 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003574}
3575
Guido van Rossum013142a1994-08-30 08:19:36 +00003576
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003577/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3578
3579 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3580 chars are formatted. XXX This is a magic number. Each formatting
3581 routine does bounds checking to ensure no overflow, but a better
3582 solution may be to malloc a buffer of appropriate size for each
3583 format. For now, the current solution is sufficient.
3584*/
3585#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003586
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003587PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003588PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003589{
3590 char *fmt, *res;
3591 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003592 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003593 PyObject *result, *orig_args;
3594#ifdef Py_USING_UNICODE
3595 PyObject *v, *w;
3596#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003597 PyObject *dict = NULL;
3598 if (format == NULL || !PyString_Check(format) || args == NULL) {
3599 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003600 return NULL;
3601 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003602 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003603 fmt = PyString_AS_STRING(format);
3604 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003605 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003606 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003607 if (result == NULL)
3608 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003609 res = PyString_AsString(result);
3610 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003611 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003612 argidx = 0;
3613 }
3614 else {
3615 arglen = -1;
3616 argidx = -2;
3617 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003618 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003619 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003620 while (--fmtcnt >= 0) {
3621 if (*fmt != '%') {
3622 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003623 rescnt = fmtcnt + 100;
3624 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003625 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003626 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003627 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003628 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003629 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003630 }
3631 *res++ = *fmt++;
3632 }
3633 else {
3634 /* Got a format specifier */
3635 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003636 int width = -1;
3637 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003638 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003639 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003640 PyObject *v = NULL;
3641 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003642 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003643 int sign;
3644 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003645 char formatbuf[FORMATBUFLEN];
3646 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003647#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003648 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003649 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003650#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003651
Guido van Rossumda9c2711996-12-05 21:58:58 +00003652 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003653 if (*fmt == '(') {
3654 char *keystart;
3655 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003656 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003657 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003658
3659 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003660 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003661 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003662 goto error;
3663 }
3664 ++fmt;
3665 --fmtcnt;
3666 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003667 /* Skip over balanced parentheses */
3668 while (pcount > 0 && --fmtcnt >= 0) {
3669 if (*fmt == ')')
3670 --pcount;
3671 else if (*fmt == '(')
3672 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003673 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003674 }
3675 keylen = fmt - keystart - 1;
3676 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003677 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003678 "incomplete format key");
3679 goto error;
3680 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003681 key = PyString_FromStringAndSize(keystart,
3682 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003683 if (key == NULL)
3684 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003685 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003686 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003687 args_owned = 0;
3688 }
3689 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003690 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003691 if (args == NULL) {
3692 goto error;
3693 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003694 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003695 arglen = -1;
3696 argidx = -2;
3697 }
Guido van Rossume5372401993-03-16 12:15:04 +00003698 while (--fmtcnt >= 0) {
3699 switch (c = *fmt++) {
3700 case '-': flags |= F_LJUST; continue;
3701 case '+': flags |= F_SIGN; continue;
3702 case ' ': flags |= F_BLANK; continue;
3703 case '#': flags |= F_ALT; continue;
3704 case '0': flags |= F_ZERO; continue;
3705 }
3706 break;
3707 }
3708 if (c == '*') {
3709 v = getnextarg(args, arglen, &argidx);
3710 if (v == NULL)
3711 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003712 if (!PyInt_Check(v)) {
3713 PyErr_SetString(PyExc_TypeError,
3714 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003715 goto error;
3716 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003717 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003718 if (width < 0) {
3719 flags |= F_LJUST;
3720 width = -width;
3721 }
Guido van Rossume5372401993-03-16 12:15:04 +00003722 if (--fmtcnt >= 0)
3723 c = *fmt++;
3724 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003725 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003726 width = c - '0';
3727 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003728 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003729 if (!isdigit(c))
3730 break;
3731 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003732 PyErr_SetString(
3733 PyExc_ValueError,
3734 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003735 goto error;
3736 }
3737 width = width*10 + (c - '0');
3738 }
3739 }
3740 if (c == '.') {
3741 prec = 0;
3742 if (--fmtcnt >= 0)
3743 c = *fmt++;
3744 if (c == '*') {
3745 v = getnextarg(args, arglen, &argidx);
3746 if (v == NULL)
3747 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003748 if (!PyInt_Check(v)) {
3749 PyErr_SetString(
3750 PyExc_TypeError,
3751 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003752 goto error;
3753 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003754 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003755 if (prec < 0)
3756 prec = 0;
3757 if (--fmtcnt >= 0)
3758 c = *fmt++;
3759 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003760 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003761 prec = c - '0';
3762 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003763 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003764 if (!isdigit(c))
3765 break;
3766 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003767 PyErr_SetString(
3768 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003769 "prec too big");
3770 goto error;
3771 }
3772 prec = prec*10 + (c - '0');
3773 }
3774 }
3775 } /* prec */
3776 if (fmtcnt >= 0) {
3777 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003778 if (--fmtcnt >= 0)
3779 c = *fmt++;
3780 }
3781 }
3782 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003783 PyErr_SetString(PyExc_ValueError,
3784 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003785 goto error;
3786 }
3787 if (c != '%') {
3788 v = getnextarg(args, arglen, &argidx);
3789 if (v == NULL)
3790 goto error;
3791 }
3792 sign = 0;
3793 fill = ' ';
3794 switch (c) {
3795 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003796 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003797 len = 1;
3798 break;
3799 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003800 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003801#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003802 if (PyUnicode_Check(v)) {
3803 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003804 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003805 goto unicode;
3806 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003807#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003808 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003809 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003810 else
3811 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003812 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003813 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003814 if (!PyString_Check(temp)) {
3815 PyErr_SetString(PyExc_TypeError,
3816 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003817 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003818 goto error;
3819 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003820 pbuf = PyString_AS_STRING(temp);
3821 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003822 if (prec >= 0 && len > prec)
3823 len = prec;
3824 break;
3825 case 'i':
3826 case 'd':
3827 case 'u':
3828 case 'o':
3829 case 'x':
3830 case 'X':
3831 if (c == 'i')
3832 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003833 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003834 temp = _PyString_FormatLong(v, flags,
3835 prec, c, &pbuf, &len);
3836 if (!temp)
3837 goto error;
3838 /* unbounded ints can always produce
3839 a sign character! */
3840 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003841 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003842 else {
3843 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003844 len = formatint(pbuf,
3845 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003846 flags, prec, c, v);
3847 if (len < 0)
3848 goto error;
3849 /* only d conversion is signed */
3850 sign = c == 'd';
3851 }
3852 if (flags & F_ZERO)
3853 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003854 break;
3855 case 'e':
3856 case 'E':
3857 case 'f':
3858 case 'g':
3859 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003860 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003861 len = formatfloat(pbuf, sizeof(formatbuf),
3862 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003863 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003864 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003865 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003866 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003867 fill = '0';
3868 break;
3869 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003870 pbuf = formatbuf;
3871 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003872 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003873 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003874 break;
3875 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003876 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003877 "unsupported format character '%c' (0x%x) "
3878 "at index %i",
3879 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003880 goto error;
3881 }
3882 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003883 if (*pbuf == '-' || *pbuf == '+') {
3884 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003885 len--;
3886 }
3887 else if (flags & F_SIGN)
3888 sign = '+';
3889 else if (flags & F_BLANK)
3890 sign = ' ';
3891 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003892 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003893 }
3894 if (width < len)
3895 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003896 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003897 reslen -= rescnt;
3898 rescnt = width + fmtcnt + 100;
3899 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003900 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003901 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003902 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003903 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003904 }
3905 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003906 if (fill != ' ')
3907 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003908 rescnt--;
3909 if (width > len)
3910 width--;
3911 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003912 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3913 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003914 assert(pbuf[1] == c);
3915 if (fill != ' ') {
3916 *res++ = *pbuf++;
3917 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003918 }
Tim Petersfff53252001-04-12 18:38:48 +00003919 rescnt -= 2;
3920 width -= 2;
3921 if (width < 0)
3922 width = 0;
3923 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003924 }
3925 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003926 do {
3927 --rescnt;
3928 *res++ = fill;
3929 } while (--width > len);
3930 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003931 if (fill == ' ') {
3932 if (sign)
3933 *res++ = sign;
3934 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003935 (c == 'x' || c == 'X')) {
3936 assert(pbuf[0] == '0');
3937 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003938 *res++ = *pbuf++;
3939 *res++ = *pbuf++;
3940 }
3941 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003942 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003943 res += len;
3944 rescnt -= len;
3945 while (--width >= len) {
3946 --rescnt;
3947 *res++ = ' ';
3948 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003949 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003950 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003951 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003952 goto error;
3953 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003954 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003955 } /* '%' */
3956 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003957 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003958 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003959 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003960 goto error;
3961 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003962 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003963 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003964 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003965 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003966 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003967
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003968#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003969 unicode:
3970 if (args_owned) {
3971 Py_DECREF(args);
3972 args_owned = 0;
3973 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003974 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003975 if (PyTuple_Check(orig_args) && argidx > 0) {
3976 PyObject *v;
3977 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3978 v = PyTuple_New(n);
3979 if (v == NULL)
3980 goto error;
3981 while (--n >= 0) {
3982 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3983 Py_INCREF(w);
3984 PyTuple_SET_ITEM(v, n, w);
3985 }
3986 args = v;
3987 } else {
3988 Py_INCREF(orig_args);
3989 args = orig_args;
3990 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003991 args_owned = 1;
3992 /* Take what we have of the result and let the Unicode formatting
3993 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003994 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003995 if (_PyString_Resize(&result, rescnt))
3996 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003997 fmtcnt = PyString_GET_SIZE(format) - \
3998 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003999 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4000 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004001 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004002 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004003 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004004 if (v == NULL)
4005 goto error;
4006 /* Paste what we have (result) to what the Unicode formatting
4007 function returned (v) and return the result (or error) */
4008 w = PyUnicode_Concat(result, v);
4009 Py_DECREF(result);
4010 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004011 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004012 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004013#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004014
Guido van Rossume5372401993-03-16 12:15:04 +00004015 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004016 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004017 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004018 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004019 }
Guido van Rossume5372401993-03-16 12:15:04 +00004020 return NULL;
4021}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004022
Guido van Rossum2a61e741997-01-18 07:55:05 +00004023void
Fred Drakeba096332000-07-09 07:04:36 +00004024PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004025{
4026 register PyStringObject *s = (PyStringObject *)(*p);
4027 PyObject *t;
4028 if (s == NULL || !PyString_Check(s))
4029 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004030 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004031 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004032 if (interned == NULL) {
4033 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004034 if (interned == NULL) {
4035 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004036 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004037 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004038 }
4039 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4040 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004041 Py_DECREF(*p);
4042 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004043 return;
4044 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004045 /* Ensure that only true string objects appear in the intern dict */
4046 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004047 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4048 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004049 if (t == NULL) {
4050 PyErr_Clear();
4051 return;
Tim Peters111f6092001-09-12 07:54:51 +00004052 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004053 } else {
4054 t = (PyObject*) s;
4055 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004056 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004057
4058 if (PyDict_SetItem(interned, t, t) == 0) {
4059 /* The two references in interned are not counted by
4060 refcnt. The string deallocator will take care of this */
4061 ((PyObject *)t)->ob_refcnt-=2;
4062 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4063 Py_DECREF(*p);
4064 *p = t;
4065 return;
4066 }
4067 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004068 PyErr_Clear();
4069}
4070
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004071void
4072PyString_InternImmortal(PyObject **p)
4073{
4074 PyString_InternInPlace(p);
4075 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4076 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4077 Py_INCREF(*p);
4078 }
4079}
4080
Guido van Rossum2a61e741997-01-18 07:55:05 +00004081
4082PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004083PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004084{
4085 PyObject *s = PyString_FromString(cp);
4086 if (s == NULL)
4087 return NULL;
4088 PyString_InternInPlace(&s);
4089 return s;
4090}
4091
Guido van Rossum8cf04761997-08-02 02:57:45 +00004092void
Fred Drakeba096332000-07-09 07:04:36 +00004093PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004094{
4095 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004096 for (i = 0; i < UCHAR_MAX + 1; i++) {
4097 Py_XDECREF(characters[i]);
4098 characters[i] = NULL;
4099 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004100 Py_XDECREF(nullstring);
4101 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004102}
Barry Warsawa903ad982001-02-23 16:40:48 +00004103
Barry Warsawa903ad982001-02-23 16:40:48 +00004104void _Py_ReleaseInternedStrings(void)
4105{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004106 PyObject *keys;
4107 PyStringObject *s;
4108 int i, n;
4109
4110 if (interned == NULL || !PyDict_Check(interned))
4111 return;
4112 keys = PyDict_Keys(interned);
4113 if (keys == NULL || !PyList_Check(keys)) {
4114 PyErr_Clear();
4115 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004116 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004117
4118 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4119 detector, interned strings are not forcibly deallocated; rather, we
4120 give them their stolen references back, and then clear and DECREF
4121 the interned dict. */
4122
4123 fprintf(stderr, "releasing interned strings\n");
4124 n = PyList_GET_SIZE(keys);
4125 for (i = 0; i < n; i++) {
4126 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4127 switch (s->ob_sstate) {
4128 case SSTATE_NOT_INTERNED:
4129 /* XXX Shouldn't happen */
4130 break;
4131 case SSTATE_INTERNED_IMMORTAL:
4132 s->ob_refcnt += 1;
4133 break;
4134 case SSTATE_INTERNED_MORTAL:
4135 s->ob_refcnt += 2;
4136 break;
4137 default:
4138 Py_FatalError("Inconsistent interned string state.");
4139 }
4140 s->ob_sstate = SSTATE_NOT_INTERNED;
4141 }
4142 Py_DECREF(keys);
4143 PyDict_Clear(interned);
4144 Py_DECREF(interned);
4145 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004146}