blob: 2d695706f5b66fd708cea99eda6bcc7505abb0fa [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000052PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000055 if (size == 0 && (op = nullstring) != NULL) {
56#ifdef COUNT_ALLOCS
57 null_strings++;
58#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000059 Py_INCREF(op);
60 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000062 if (size == 1 && str != NULL &&
63 (op = characters[*str & UCHAR_MAX]) != NULL)
64 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef COUNT_ALLOCS
66 one_strings++;
67#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000068 Py_INCREF(op);
69 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000070 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000071
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000072 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000073 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000074 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000130 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000136 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000158 int n = 0;
159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
164 memcpy(count, vargs, sizeof(va_list));
165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
179 /* skip the 'l' in %ld, since it doesn't change the
180 width. although only %d is supported (see
181 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000182 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000183 if (*f == 'l' && *(f+1) == 'd')
184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
193 case 'd': case 'i': case 'x':
194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
239 int i, longflag = 0;
240 /* parse the width.precision part (we're only
241 interested in the precision value, if any) */
242 n = 0;
243 while (isdigit(Py_CHARMASK(*f)))
244 n = (n*10) + *f++ - '0';
245 if (*f == '.') {
246 f++;
247 n = 0;
248 while (isdigit(Py_CHARMASK(*f)))
249 n = (n*10) + *f++ - '0';
250 }
251 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
252 f++;
253 /* handle the long flag, but only for %ld. others
254 can be added when necessary. */
255 if (*f == 'l' && *(f+1) == 'd') {
256 longflag = 1;
257 ++f;
258 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000259
Barry Warsawdadace02001-08-24 18:32:06 +0000260 switch (*f) {
261 case 'c':
262 *s++ = va_arg(vargs, int);
263 break;
264 case 'd':
265 if (longflag)
266 sprintf(s, "%ld", va_arg(vargs, long));
267 else
268 sprintf(s, "%d", va_arg(vargs, int));
269 s += strlen(s);
270 break;
271 case 'i':
272 sprintf(s, "%i", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'x':
276 sprintf(s, "%x", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 's':
280 p = va_arg(vargs, char*);
281 i = strlen(p);
282 if (n > 0 && i > n)
283 i = n;
284 memcpy(s, p, i);
285 s += i;
286 break;
287 case 'p':
288 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000289 /* %p is ill-defined: ensure leading 0x. */
290 if (s[1] == 'X')
291 s[1] = 'x';
292 else if (s[1] != 'x') {
293 memmove(s+2, s, strlen(s)+1);
294 s[0] = '0';
295 s[1] = 'x';
296 }
Barry Warsawdadace02001-08-24 18:32:06 +0000297 s += strlen(s);
298 break;
299 case '%':
300 *s++ = '%';
301 break;
302 default:
303 strcpy(s, p);
304 s += strlen(s);
305 goto end;
306 }
307 } else
308 *s++ = *f;
309 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000310
Barry Warsawdadace02001-08-24 18:32:06 +0000311 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000312 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000313 return string;
314}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000315
Barry Warsawdadace02001-08-24 18:32:06 +0000316PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000317PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000318{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000319 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000320 va_list vargs;
321
322#ifdef HAVE_STDARG_PROTOTYPES
323 va_start(vargs, format);
324#else
325 va_start(vargs);
326#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000327 ret = PyString_FromFormatV(format, vargs);
328 va_end(vargs);
329 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000330}
331
332
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000333PyObject *PyString_Decode(const char *s,
334 int size,
335 const char *encoding,
336 const char *errors)
337{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000338 PyObject *v, *str;
339
340 str = PyString_FromStringAndSize(s, size);
341 if (str == NULL)
342 return NULL;
343 v = PyString_AsDecodedString(str, encoding, errors);
344 Py_DECREF(str);
345 return v;
346}
347
348PyObject *PyString_AsDecodedObject(PyObject *str,
349 const char *encoding,
350 const char *errors)
351{
352 PyObject *v;
353
354 if (!PyString_Check(str)) {
355 PyErr_BadArgument();
356 goto onError;
357 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000358
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000359 if (encoding == NULL) {
360#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000361 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000362#else
363 PyErr_SetString(PyExc_ValueError, "no encoding specified");
364 goto onError;
365#endif
366 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367
368 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000369 v = PyCodec_Decode(str, encoding, errors);
370 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372
373 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000374
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376 return NULL;
377}
378
379PyObject *PyString_AsDecodedString(PyObject *str,
380 const char *encoding,
381 const char *errors)
382{
383 PyObject *v;
384
385 v = PyString_AsDecodedObject(str, encoding, errors);
386 if (v == NULL)
387 goto onError;
388
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000389#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000390 /* Convert Unicode to a string using the default encoding */
391 if (PyUnicode_Check(v)) {
392 PyObject *temp = v;
393 v = PyUnicode_AsEncodedString(v, NULL, NULL);
394 Py_DECREF(temp);
395 if (v == NULL)
396 goto onError;
397 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000398#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 if (!PyString_Check(v)) {
400 PyErr_Format(PyExc_TypeError,
401 "decoder did not return a string object (type=%.400s)",
402 v->ob_type->tp_name);
403 Py_DECREF(v);
404 goto onError;
405 }
406
407 return v;
408
409 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000410 return NULL;
411}
412
413PyObject *PyString_Encode(const char *s,
414 int size,
415 const char *encoding,
416 const char *errors)
417{
418 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000419
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000420 str = PyString_FromStringAndSize(s, size);
421 if (str == NULL)
422 return NULL;
423 v = PyString_AsEncodedString(str, encoding, errors);
424 Py_DECREF(str);
425 return v;
426}
427
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000428PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000429 const char *encoding,
430 const char *errors)
431{
432 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000433
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 if (!PyString_Check(str)) {
435 PyErr_BadArgument();
436 goto onError;
437 }
438
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000439 if (encoding == NULL) {
440#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000442#else
443 PyErr_SetString(PyExc_ValueError, "no encoding specified");
444 goto onError;
445#endif
446 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000447
448 /* Encode via the codec registry */
449 v = PyCodec_Encode(str, encoding, errors);
450 if (v == NULL)
451 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000452
453 return v;
454
455 onError:
456 return NULL;
457}
458
459PyObject *PyString_AsEncodedString(PyObject *str,
460 const char *encoding,
461 const char *errors)
462{
463 PyObject *v;
464
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000465 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000466 if (v == NULL)
467 goto onError;
468
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000469#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470 /* Convert Unicode to a string using the default encoding */
471 if (PyUnicode_Check(v)) {
472 PyObject *temp = v;
473 v = PyUnicode_AsEncodedString(v, NULL, NULL);
474 Py_DECREF(temp);
475 if (v == NULL)
476 goto onError;
477 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000478#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 if (!PyString_Check(v)) {
480 PyErr_Format(PyExc_TypeError,
481 "encoder did not return a string object (type=%.400s)",
482 v->ob_type->tp_name);
483 Py_DECREF(v);
484 goto onError;
485 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000486
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000488
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000489 onError:
490 return NULL;
491}
492
Guido van Rossum234f9421993-06-17 12:35:49 +0000493static void
Fred Drakeba096332000-07-09 07:04:36 +0000494string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000495{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000496 switch (PyString_CHECK_INTERNED(op)) {
497 case SSTATE_NOT_INTERNED:
498 break;
499
500 case SSTATE_INTERNED_MORTAL:
501 /* revive dead object temporarily for DelItem */
502 op->ob_refcnt = 3;
503 if (PyDict_DelItem(interned, op) != 0)
504 Py_FatalError(
505 "deletion of interned string failed");
506 break;
507
508 case SSTATE_INTERNED_IMMORTAL:
509 Py_FatalError("Immortal interned string died.");
510
511 default:
512 Py_FatalError("Inconsistent interned string state.");
513 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000514 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000515}
516
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000517/* Unescape a backslash-escaped string. If unicode is non-zero,
518 the string is a u-literal. If recode_encoding is non-zero,
519 the string is UTF-8 encoded and should be re-encoded in the
520 specified encoding. */
521
522PyObject *PyString_DecodeEscape(const char *s,
523 int len,
524 const char *errors,
525 int unicode,
526 const char *recode_encoding)
527{
528 int c;
529 char *p, *buf;
530 const char *end;
531 PyObject *v;
Walter Dörwald8709a422002-09-03 13:53:40 +0000532 int newlen = recode_encoding ? 4*len:len;
533 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 if (v == NULL)
535 return NULL;
536 p = buf = PyString_AsString(v);
537 end = s + len;
538 while (s < end) {
539 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000540 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000541#ifdef Py_USING_UNICODE
542 if (recode_encoding && (*s & 0x80)) {
543 PyObject *u, *w;
544 char *r;
545 const char* t;
546 int rn;
547 t = s;
548 /* Decode non-ASCII bytes as UTF-8. */
549 while (t < end && (*t & 0x80)) t++;
550 u = PyUnicode_DecodeUTF8(s, t - s, errors);
551 if(!u) goto failed;
552
553 /* Recode them in target encoding. */
554 w = PyUnicode_AsEncodedString(
555 u, recode_encoding, errors);
556 Py_DECREF(u);
557 if (!w) goto failed;
558
559 /* Append bytes to output buffer. */
560 r = PyString_AsString(w);
561 rn = PyString_Size(w);
562 memcpy(p, r, rn);
563 p += rn;
564 Py_DECREF(w);
565 s = t;
566 } else {
567 *p++ = *s++;
568 }
569#else
570 *p++ = *s++;
571#endif
572 continue;
573 }
574 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000575 if (s==end) {
576 PyErr_SetString(PyExc_ValueError,
577 "Trailing \\ in string");
578 goto failed;
579 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000580 switch (*s++) {
581 /* XXX This assumes ASCII! */
582 case '\n': break;
583 case '\\': *p++ = '\\'; break;
584 case '\'': *p++ = '\''; break;
585 case '\"': *p++ = '\"'; break;
586 case 'b': *p++ = '\b'; break;
587 case 'f': *p++ = '\014'; break; /* FF */
588 case 't': *p++ = '\t'; break;
589 case 'n': *p++ = '\n'; break;
590 case 'r': *p++ = '\r'; break;
591 case 'v': *p++ = '\013'; break; /* VT */
592 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
593 case '0': case '1': case '2': case '3':
594 case '4': case '5': case '6': case '7':
595 c = s[-1] - '0';
596 if ('0' <= *s && *s <= '7') {
597 c = (c<<3) + *s++ - '0';
598 if ('0' <= *s && *s <= '7')
599 c = (c<<3) + *s++ - '0';
600 }
601 *p++ = c;
602 break;
603 case 'x':
604 if (isxdigit(Py_CHARMASK(s[0]))
605 && isxdigit(Py_CHARMASK(s[1]))) {
606 unsigned int x = 0;
607 c = Py_CHARMASK(*s);
608 s++;
609 if (isdigit(c))
610 x = c - '0';
611 else if (islower(c))
612 x = 10 + c - 'a';
613 else
614 x = 10 + c - 'A';
615 x = x << 4;
616 c = Py_CHARMASK(*s);
617 s++;
618 if (isdigit(c))
619 x += c - '0';
620 else if (islower(c))
621 x += 10 + c - 'a';
622 else
623 x += 10 + c - 'A';
624 *p++ = x;
625 break;
626 }
627 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 PyErr_SetString(PyExc_ValueError,
629 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000630 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 }
632 if (strcmp(errors, "replace") == 0) {
633 *p++ = '?';
634 } else if (strcmp(errors, "ignore") == 0)
635 /* do nothing */;
636 else {
637 PyErr_Format(PyExc_ValueError,
638 "decoding error; "
639 "unknown error handling code: %.400s",
640 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000641 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000642 }
643#ifndef Py_USING_UNICODE
644 case 'u':
645 case 'U':
646 case 'N':
647 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000648 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000649 "Unicode escapes not legal "
650 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000651 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 }
653#endif
654 default:
655 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000656 s--;
657 goto non_esc; /* an arbitry number of unescaped
658 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000659 }
660 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000661 if (p-buf < newlen)
662 _PyString_Resize(&v, (int)(p - buf));
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 return v;
664 failed:
665 Py_DECREF(v);
666 return NULL;
667}
668
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000669static int
670string_getsize(register PyObject *op)
671{
672 char *s;
673 int len;
674 if (PyString_AsStringAndSize(op, &s, &len))
675 return -1;
676 return len;
677}
678
679static /*const*/ char *
680string_getbuffer(register PyObject *op)
681{
682 char *s;
683 int len;
684 if (PyString_AsStringAndSize(op, &s, &len))
685 return NULL;
686 return s;
687}
688
Guido van Rossumd7047b31995-01-02 19:07:15 +0000689int
Fred Drakeba096332000-07-09 07:04:36 +0000690PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000691{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000692 if (!PyString_Check(op))
693 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000694 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695}
696
697/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000698PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (!PyString_Check(op))
701 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000702 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703}
704
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705int
706PyString_AsStringAndSize(register PyObject *obj,
707 register char **s,
708 register int *len)
709{
710 if (s == NULL) {
711 PyErr_BadInternalCall();
712 return -1;
713 }
714
715 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000716#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000717 if (PyUnicode_Check(obj)) {
718 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
719 if (obj == NULL)
720 return -1;
721 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000722 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000723#endif
724 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000725 PyErr_Format(PyExc_TypeError,
726 "expected string or Unicode object, "
727 "%.200s found", obj->ob_type->tp_name);
728 return -1;
729 }
730 }
731
732 *s = PyString_AS_STRING(obj);
733 if (len != NULL)
734 *len = PyString_GET_SIZE(obj);
735 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
736 PyErr_SetString(PyExc_TypeError,
737 "expected string without null bytes");
738 return -1;
739 }
740 return 0;
741}
742
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000743/* Methods */
744
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000745static int
Fred Drakeba096332000-07-09 07:04:36 +0000746string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747{
748 int i;
749 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000750 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000751
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000752 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000753 if (! PyString_CheckExact(op)) {
754 int ret;
755 /* A str subclass may have its own __str__ method. */
756 op = (PyStringObject *) PyObject_Str((PyObject *)op);
757 if (op == NULL)
758 return -1;
759 ret = string_print(op, fp, flags);
760 Py_DECREF(op);
761 return ret;
762 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000763 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000764#ifdef __VMS
765 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
766#else
767 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
768#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000769 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771
Thomas Wouters7e474022000-07-16 12:04:32 +0000772 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000773 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000774 if (memchr(op->ob_sval, '\'', op->ob_size) &&
775 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000776 quote = '"';
777
778 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779 for (i = 0; i < op->ob_size; i++) {
780 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000781 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000782 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000783 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000784 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000785 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\r");
789 else if (c < ' ' || c >= 0x7f)
790 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000791 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000796}
797
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000798PyObject *
799PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000801 register PyStringObject* op = (PyStringObject*) obj;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000802 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
803 PyObject *v;
804 if (newsize > INT_MAX) {
805 PyErr_SetString(PyExc_OverflowError,
806 "string is too large to make repr");
807 }
808 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000810 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 }
812 else {
813 register int i;
814 register char c;
815 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 int quote;
817
Thomas Wouters7e474022000-07-16 12:04:32 +0000818 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000820 if (smartquotes &&
821 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000822 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '"';
824
Tim Peters9161c8b2001-12-03 01:55:38 +0000825 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000828 /* There's at least enough room for a hex escape
829 and a closing quote. */
830 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000833 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000834 else if (c == '\t')
835 *p++ = '\\', *p++ = 't';
836 else if (c == '\n')
837 *p++ = '\\', *p++ = 'n';
838 else if (c == '\r')
839 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000840 else if (c < ' ' || c >= 0x7f) {
841 /* For performance, we don't want to call
842 PyOS_snprintf here (extra layers of
843 function call). */
844 sprintf(p, "\\x%02x", c & 0xff);
845 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 else
848 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000850 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000851 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000853 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000855 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857}
858
Guido van Rossum189f1df2001-05-01 16:51:53 +0000859static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000860string_repr(PyObject *op)
861{
862 return PyString_Repr(op, 1);
863}
864
865static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000866string_str(PyObject *s)
867{
Tim Petersc9933152001-10-16 20:18:24 +0000868 assert(PyString_Check(s));
869 if (PyString_CheckExact(s)) {
870 Py_INCREF(s);
871 return s;
872 }
873 else {
874 /* Subtype -- return genuine string with the same value. */
875 PyStringObject *t = (PyStringObject *) s;
876 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
877 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000878}
879
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880static int
Fred Drakeba096332000-07-09 07:04:36 +0000881string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882{
883 return a->ob_size;
884}
885
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000886static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000887string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888{
889 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890 register PyStringObject *op;
891 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000892#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000893 if (PyUnicode_Check(bb))
894 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000895#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000896 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000897 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000898 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 return NULL;
900 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000901#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000902 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000903 if ((a->ob_size == 0 || b->ob_size == 0) &&
904 PyString_CheckExact(a) && PyString_CheckExact(b)) {
905 if (a->ob_size == 0) {
906 Py_INCREF(bb);
907 return bb;
908 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000909 Py_INCREF(a);
910 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
912 size = a->ob_size + b->ob_size;
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000913 /* Inline PyObject_NewVar */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000915 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000916 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000917 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000918 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000919 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000920 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000921 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
922 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
923 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000924 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925#undef b
926}
927
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000929string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930{
931 register int i;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000932 register int j;
Guido van Rossum2095d241997-04-09 19:41:24 +0000933 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000935 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936 if (n < 0)
937 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000938 /* watch out for overflows: the size can overflow int,
939 * and the # of bytes needed can overflow size_t
940 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000941 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000942 if (n && size / n != a->ob_size) {
943 PyErr_SetString(PyExc_OverflowError,
944 "repeated string is too long");
945 return NULL;
946 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000947 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000948 Py_INCREF(a);
949 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000950 }
Tim Peters8f422462000-09-09 06:13:41 +0000951 nbytes = size * sizeof(char);
952 if (nbytes / sizeof(char) != (size_t)size ||
953 nbytes + sizeof(PyStringObject) <= nbytes) {
954 PyErr_SetString(PyExc_OverflowError,
955 "repeated string is too long");
956 return NULL;
957 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000959 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000960 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000961 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000962 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000963 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000964 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000965 op->ob_sval[size] = '\0';
966 if (a->ob_size == 1 && n > 0) {
967 memset(op->ob_sval, a->ob_sval[0] , n);
968 return (PyObject *) op;
969 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000970 i = 0;
971 if (i < size) {
972 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
973 i = (int) a->ob_size;
974 }
975 while (i < size) {
976 j = (i <= size-i) ? i : size-i;
977 memcpy(op->ob_sval+i, op->ob_sval, j);
978 i += j;
979 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000980 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000981}
982
983/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
984
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000985static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000986string_slice(register PyStringObject *a, register int i, register int j)
987 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000988{
989 if (i < 0)
990 i = 0;
991 if (j < 0)
992 j = 0; /* Avoid signed/unsigned bug in next line */
993 if (j > a->ob_size)
994 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000995 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
996 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 Py_INCREF(a);
998 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999 }
1000 if (j < i)
1001 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001002 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003}
1004
Guido van Rossum9284a572000-03-07 15:53:43 +00001005static int
Fred Drakeba096332000-07-09 07:04:36 +00001006string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001007{
Barry Warsaw817918c2002-08-06 16:58:21 +00001008 const char *lhs, *rhs, *end;
1009 int size;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001010
1011 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001012#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001013 if (PyUnicode_Check(el))
1014 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001015#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001016 if (!PyString_Check(el)) {
1017 PyErr_SetString(PyExc_TypeError,
1018 "'in <string>' requires string as left operand");
1019 return -1;
1020 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001021 }
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001022 size = PyString_GET_SIZE(el);
Barry Warsaw817918c2002-08-06 16:58:21 +00001023 rhs = PyString_AS_STRING(el);
1024 lhs = PyString_AS_STRING(a);
1025
1026 /* optimize for a single character */
1027 if (size == 1)
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001028 return memchr(lhs, *rhs, PyString_GET_SIZE(a)) != NULL;
Barry Warsaw817918c2002-08-06 16:58:21 +00001029
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001030 end = lhs + (PyString_GET_SIZE(a) - size);
Barry Warsaw817918c2002-08-06 16:58:21 +00001031 while (lhs <= end) {
1032 if (memcmp(lhs++, rhs, size) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001033 return 1;
1034 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001035
Guido van Rossum9284a572000-03-07 15:53:43 +00001036 return 0;
1037}
1038
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001039static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001040string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001041{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001042 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001043 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001044 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001045 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001046 return NULL;
1047 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001048 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001049 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001050 if (v == NULL)
1051 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001052 else {
1053#ifdef COUNT_ALLOCS
1054 one_strings++;
1055#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001056 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001057 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001058 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059}
1060
Martin v. Löwiscd353062001-05-24 16:56:35 +00001061static PyObject*
1062string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001063{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001064 int c;
1065 int len_a, len_b;
1066 int min_len;
1067 PyObject *result;
1068
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001069 /* Make sure both arguments are strings. */
1070 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001071 result = Py_NotImplemented;
1072 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001073 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001074 if (a == b) {
1075 switch (op) {
1076 case Py_EQ:case Py_LE:case Py_GE:
1077 result = Py_True;
1078 goto out;
1079 case Py_NE:case Py_LT:case Py_GT:
1080 result = Py_False;
1081 goto out;
1082 }
1083 }
1084 if (op == Py_EQ) {
1085 /* Supporting Py_NE here as well does not save
1086 much time, since Py_NE is rarely used. */
1087 if (a->ob_size == b->ob_size
1088 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001089 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001090 a->ob_size) == 0)) {
1091 result = Py_True;
1092 } else {
1093 result = Py_False;
1094 }
1095 goto out;
1096 }
1097 len_a = a->ob_size; len_b = b->ob_size;
1098 min_len = (len_a < len_b) ? len_a : len_b;
1099 if (min_len > 0) {
1100 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1101 if (c==0)
1102 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1103 }else
1104 c = 0;
1105 if (c == 0)
1106 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1107 switch (op) {
1108 case Py_LT: c = c < 0; break;
1109 case Py_LE: c = c <= 0; break;
1110 case Py_EQ: assert(0); break; /* unreachable */
1111 case Py_NE: c = c != 0; break;
1112 case Py_GT: c = c > 0; break;
1113 case Py_GE: c = c >= 0; break;
1114 default:
1115 result = Py_NotImplemented;
1116 goto out;
1117 }
1118 result = c ? Py_True : Py_False;
1119 out:
1120 Py_INCREF(result);
1121 return result;
1122}
1123
1124int
1125_PyString_Eq(PyObject *o1, PyObject *o2)
1126{
1127 PyStringObject *a, *b;
1128 a = (PyStringObject*)o1;
1129 b = (PyStringObject*)o2;
1130 return a->ob_size == b->ob_size
1131 && *a->ob_sval == *b->ob_sval
1132 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001133}
1134
Guido van Rossum9bfef441993-03-29 10:43:31 +00001135static long
Fred Drakeba096332000-07-09 07:04:36 +00001136string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001137{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001138 register int len;
1139 register unsigned char *p;
1140 register long x;
1141
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001142 if (a->ob_shash != -1)
1143 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001144 len = a->ob_size;
1145 p = (unsigned char *) a->ob_sval;
1146 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001147 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001148 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001149 x ^= a->ob_size;
1150 if (x == -1)
1151 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001152 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001153 return x;
1154}
1155
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001156static PyObject*
1157string_subscript(PyStringObject* self, PyObject* item)
1158{
1159 if (PyInt_Check(item)) {
1160 long i = PyInt_AS_LONG(item);
1161 if (i < 0)
1162 i += PyString_GET_SIZE(self);
1163 return string_item(self,i);
1164 }
1165 else if (PyLong_Check(item)) {
1166 long i = PyLong_AsLong(item);
1167 if (i == -1 && PyErr_Occurred())
1168 return NULL;
1169 if (i < 0)
1170 i += PyString_GET_SIZE(self);
1171 return string_item(self,i);
1172 }
1173 else if (PySlice_Check(item)) {
1174 int start, stop, step, slicelength, cur, i;
1175 char* source_buf;
1176 char* result_buf;
1177 PyObject* result;
1178
1179 if (PySlice_GetIndicesEx((PySliceObject*)item,
1180 PyString_GET_SIZE(self),
1181 &start, &stop, &step, &slicelength) < 0) {
1182 return NULL;
1183 }
1184
1185 if (slicelength <= 0) {
1186 return PyString_FromStringAndSize("", 0);
1187 }
1188 else {
1189 source_buf = PyString_AsString((PyObject*)self);
1190 result_buf = PyMem_Malloc(slicelength);
1191
1192 for (cur = start, i = 0; i < slicelength;
1193 cur += step, i++) {
1194 result_buf[i] = source_buf[cur];
1195 }
1196
1197 result = PyString_FromStringAndSize(result_buf,
1198 slicelength);
1199 PyMem_Free(result_buf);
1200 return result;
1201 }
1202 }
1203 else {
1204 PyErr_SetString(PyExc_TypeError,
1205 "string indices must be integers");
1206 return NULL;
1207 }
1208}
1209
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001210static int
Fred Drakeba096332000-07-09 07:04:36 +00001211string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001212{
1213 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001214 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001215 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001216 return -1;
1217 }
1218 *ptr = (void *)self->ob_sval;
1219 return self->ob_size;
1220}
1221
1222static int
Fred Drakeba096332000-07-09 07:04:36 +00001223string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001224{
Guido van Rossum045e6881997-09-08 18:30:11 +00001225 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001226 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001227 return -1;
1228}
1229
1230static int
Fred Drakeba096332000-07-09 07:04:36 +00001231string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001232{
1233 if ( lenp )
1234 *lenp = self->ob_size;
1235 return 1;
1236}
1237
Guido van Rossum1db70701998-10-08 02:18:52 +00001238static int
Fred Drakeba096332000-07-09 07:04:36 +00001239string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001240{
1241 if ( index != 0 ) {
1242 PyErr_SetString(PyExc_SystemError,
1243 "accessing non-existent string segment");
1244 return -1;
1245 }
1246 *ptr = self->ob_sval;
1247 return self->ob_size;
1248}
1249
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001250static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001251 (inquiry)string_length, /*sq_length*/
1252 (binaryfunc)string_concat, /*sq_concat*/
1253 (intargfunc)string_repeat, /*sq_repeat*/
1254 (intargfunc)string_item, /*sq_item*/
1255 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001256 0, /*sq_ass_item*/
1257 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001258 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001259};
1260
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001261static PyMappingMethods string_as_mapping = {
1262 (inquiry)string_length,
1263 (binaryfunc)string_subscript,
1264 0,
1265};
1266
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001267static PyBufferProcs string_as_buffer = {
1268 (getreadbufferproc)string_buffer_getreadbuf,
1269 (getwritebufferproc)string_buffer_getwritebuf,
1270 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001271 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001272};
1273
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001274
1275
1276#define LEFTSTRIP 0
1277#define RIGHTSTRIP 1
1278#define BOTHSTRIP 2
1279
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001280/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001281static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1282
1283#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001284
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001285
1286static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001287split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001289 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290 PyObject* item;
1291 PyObject *list = PyList_New(0);
1292
1293 if (list == NULL)
1294 return NULL;
1295
Guido van Rossum4c08d552000-03-10 22:55:18 +00001296 for (i = j = 0; i < len; ) {
1297 while (i < len && isspace(Py_CHARMASK(s[i])))
1298 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001300 while (i < len && !isspace(Py_CHARMASK(s[i])))
1301 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001303 if (maxsplit-- <= 0)
1304 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001305 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1306 if (item == NULL)
1307 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001308 err = PyList_Append(list, item);
1309 Py_DECREF(item);
1310 if (err < 0)
1311 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001312 while (i < len && isspace(Py_CHARMASK(s[i])))
1313 i++;
1314 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315 }
1316 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001317 if (j < len) {
1318 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1319 if (item == NULL)
1320 goto finally;
1321 err = PyList_Append(list, item);
1322 Py_DECREF(item);
1323 if (err < 0)
1324 goto finally;
1325 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001326 return list;
1327 finally:
1328 Py_DECREF(list);
1329 return NULL;
1330}
1331
1332
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001333PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334"S.split([sep [,maxsplit]]) -> list of strings\n\
1335\n\
1336Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001337delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001338splits are done. If sep is not specified or is None, any\n\
1339whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001340
1341static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001342string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001343{
1344 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001345 int maxsplit = -1;
1346 const char *s = PyString_AS_STRING(self), *sub;
1347 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001351 if (maxsplit < 0)
1352 maxsplit = INT_MAX;
1353 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001354 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001355 if (PyString_Check(subobj)) {
1356 sub = PyString_AS_STRING(subobj);
1357 n = PyString_GET_SIZE(subobj);
1358 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001359#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001360 else if (PyUnicode_Check(subobj))
1361 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001362#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001363 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1364 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365 if (n == 0) {
1366 PyErr_SetString(PyExc_ValueError, "empty separator");
1367 return NULL;
1368 }
1369
1370 list = PyList_New(0);
1371 if (list == NULL)
1372 return NULL;
1373
1374 i = j = 0;
1375 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001376 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001377 if (maxsplit-- <= 0)
1378 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1380 if (item == NULL)
1381 goto fail;
1382 err = PyList_Append(list, item);
1383 Py_DECREF(item);
1384 if (err < 0)
1385 goto fail;
1386 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387 }
1388 else
1389 i++;
1390 }
1391 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1392 if (item == NULL)
1393 goto fail;
1394 err = PyList_Append(list, item);
1395 Py_DECREF(item);
1396 if (err < 0)
1397 goto fail;
1398
1399 return list;
1400
1401 fail:
1402 Py_DECREF(list);
1403 return NULL;
1404}
1405
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001406static PyObject *
1407rsplit_whitespace(const char *s, int len, int maxsplit)
1408{
1409 int i, j, err;
1410 PyObject* item;
1411 PyObject *list = PyList_New(0);
1412
1413 if (list == NULL)
1414 return NULL;
1415
1416 for (i = j = len - 1; i >= 0; ) {
1417 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1418 i--;
1419 j = i;
1420 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1421 i--;
1422 if (j > i) {
1423 if (maxsplit-- <= 0)
1424 break;
1425 item = PyString_FromStringAndSize(s+i+1, (int)(j-i));
1426 if (item == NULL)
1427 goto finally;
1428 err = PyList_Insert(list, 0, item);
1429 Py_DECREF(item);
1430 if (err < 0)
1431 goto finally;
1432 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1433 i--;
1434 j = i;
1435 }
1436 }
1437 if (j >= 0) {
1438 item = PyString_FromStringAndSize(s, (int)(j + 1));
1439 if (item == NULL)
1440 goto finally;
1441 err = PyList_Insert(list, 0, item);
1442 Py_DECREF(item);
1443 if (err < 0)
1444 goto finally;
1445 }
1446 return list;
1447 finally:
1448 Py_DECREF(list);
1449 return NULL;
1450}
1451
1452
1453PyDoc_STRVAR(rsplit__doc__,
1454"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1455\n\
1456Return a list of the words in the string S, using sep as the\n\
1457delimiter string, starting at the end of the string and working\n\
1458to the front. If maxsplit is given, at most maxsplit splits are\n\
1459done. If sep is not specified or is None, any whitespace string\n\
1460is a separator.");
1461
1462static PyObject *
1463string_rsplit(PyStringObject *self, PyObject *args)
1464{
1465 int len = PyString_GET_SIZE(self), n, i, j, err;
1466 int maxsplit = -1;
1467 const char *s = PyString_AS_STRING(self), *sub;
1468 PyObject *list, *item, *subobj = Py_None;
1469
1470 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1471 return NULL;
1472 if (maxsplit < 0)
1473 maxsplit = INT_MAX;
1474 if (subobj == Py_None)
1475 return rsplit_whitespace(s, len, maxsplit);
1476 if (PyString_Check(subobj)) {
1477 sub = PyString_AS_STRING(subobj);
1478 n = PyString_GET_SIZE(subobj);
1479 }
1480#ifdef Py_USING_UNICODE
1481 else if (PyUnicode_Check(subobj))
1482 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1483#endif
1484 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1485 return NULL;
1486 if (n == 0) {
1487 PyErr_SetString(PyExc_ValueError, "empty separator");
1488 return NULL;
1489 }
1490
1491 list = PyList_New(0);
1492 if (list == NULL)
1493 return NULL;
1494
1495 j = len;
1496 i = j - n;
1497 while (i >= 0) {
1498 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1499 if (maxsplit-- <= 0)
1500 break;
1501 item = PyString_FromStringAndSize(s+i+n, (int)(j-i-n));
1502 if (item == NULL)
1503 goto fail;
1504 err = PyList_Insert(list, 0, item);
1505 Py_DECREF(item);
1506 if (err < 0)
1507 goto fail;
1508 j = i;
1509 i -= n;
1510 }
1511 else
1512 i--;
1513 }
1514 item = PyString_FromStringAndSize(s, j);
1515 if (item == NULL)
1516 goto fail;
1517 err = PyList_Insert(list, 0, item);
1518 Py_DECREF(item);
1519 if (err < 0)
1520 goto fail;
1521
1522 return list;
1523
1524 fail:
1525 Py_DECREF(list);
1526 return NULL;
1527}
1528
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001529
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001530PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531"S.join(sequence) -> string\n\
1532\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001533Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001534sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001535
1536static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001537string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001538{
1539 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001540 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001541 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542 char *p;
1543 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001544 size_t sz = 0;
1545 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001546 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547
Tim Peters19fe14e2001-01-19 03:03:47 +00001548 seq = PySequence_Fast(orig, "");
1549 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001550 if (PyErr_ExceptionMatches(PyExc_TypeError))
1551 PyErr_Format(PyExc_TypeError,
1552 "sequence expected, %.80s found",
1553 orig->ob_type->tp_name);
1554 return NULL;
1555 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001556
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001557 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001558 if (seqlen == 0) {
1559 Py_DECREF(seq);
1560 return PyString_FromString("");
1561 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001562 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001563 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001564 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1565 PyErr_Format(PyExc_TypeError,
1566 "sequence item 0: expected string,"
1567 " %.80s found",
1568 item->ob_type->tp_name);
1569 Py_DECREF(seq);
1570 return NULL;
1571 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001572 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001573 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001574 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001576
Tim Peters19fe14e2001-01-19 03:03:47 +00001577 /* There are at least two things to join. Do a pre-pass to figure out
1578 * the total amount of space we'll need (sz), see whether any argument
1579 * is absurd, and defer to the Unicode join if appropriate.
1580 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001581 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001582 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001583 item = PySequence_Fast_GET_ITEM(seq, i);
1584 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001585#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001586 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001587 /* Defer to Unicode join.
1588 * CAUTION: There's no gurantee that the
1589 * original sequence can be iterated over
1590 * again, so we must pass seq here.
1591 */
1592 PyObject *result;
1593 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001594 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001595 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001596 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001597#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001598 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001599 "sequence item %i: expected string,"
1600 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001601 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001602 Py_DECREF(seq);
1603 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001604 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001605 sz += PyString_GET_SIZE(item);
1606 if (i != 0)
1607 sz += seplen;
1608 if (sz < old_sz || sz > INT_MAX) {
1609 PyErr_SetString(PyExc_OverflowError,
1610 "join() is too long for a Python string");
1611 Py_DECREF(seq);
1612 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001614 }
1615
1616 /* Allocate result space. */
1617 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1618 if (res == NULL) {
1619 Py_DECREF(seq);
1620 return NULL;
1621 }
1622
1623 /* Catenate everything. */
1624 p = PyString_AS_STRING(res);
1625 for (i = 0; i < seqlen; ++i) {
1626 size_t n;
1627 item = PySequence_Fast_GET_ITEM(seq, i);
1628 n = PyString_GET_SIZE(item);
1629 memcpy(p, PyString_AS_STRING(item), n);
1630 p += n;
1631 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001632 memcpy(p, sep, seplen);
1633 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001634 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001636
Jeremy Hylton49048292000-07-11 03:28:17 +00001637 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639}
1640
Tim Peters52e155e2001-06-16 05:42:57 +00001641PyObject *
1642_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001643{
Tim Petersa7259592001-06-16 05:11:17 +00001644 assert(sep != NULL && PyString_Check(sep));
1645 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001646 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001647}
1648
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001649static void
1650string_adjust_indices(int *start, int *end, int len)
1651{
1652 if (*end > len)
1653 *end = len;
1654 else if (*end < 0)
1655 *end += len;
1656 if (*end < 0)
1657 *end = 0;
1658 if (*start < 0)
1659 *start += len;
1660 if (*start < 0)
1661 *start = 0;
1662}
1663
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001664static long
Fred Drakeba096332000-07-09 07:04:36 +00001665string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001666{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001667 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668 int len = PyString_GET_SIZE(self);
1669 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001670 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001671
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001672 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001673 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001674 return -2;
1675 if (PyString_Check(subobj)) {
1676 sub = PyString_AS_STRING(subobj);
1677 n = PyString_GET_SIZE(subobj);
1678 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001679#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001680 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001681 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001682#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684 return -2;
1685
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001686 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687
Guido van Rossum4c08d552000-03-10 22:55:18 +00001688 if (dir > 0) {
1689 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001690 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001691 last -= n;
1692 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001693 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001694 return (long)i;
1695 }
1696 else {
1697 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001698
Guido van Rossum4c08d552000-03-10 22:55:18 +00001699 if (n == 0 && i <= last)
1700 return (long)last;
1701 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001702 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001703 return (long)j;
1704 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001705
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706 return -1;
1707}
1708
1709
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001710PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001711"S.find(sub [,start [,end]]) -> int\n\
1712\n\
1713Return the lowest index in S where substring sub is found,\n\
1714such that sub is contained within s[start,end]. Optional\n\
1715arguments start and end are interpreted as in slice notation.\n\
1716\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001717Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718
1719static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001720string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001722 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723 if (result == -2)
1724 return NULL;
1725 return PyInt_FromLong(result);
1726}
1727
1728
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001729PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730"S.index(sub [,start [,end]]) -> int\n\
1731\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001732Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733
1734static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001735string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738 if (result == -2)
1739 return NULL;
1740 if (result == -1) {
1741 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001742 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743 return NULL;
1744 }
1745 return PyInt_FromLong(result);
1746}
1747
1748
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001749PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750"S.rfind(sub [,start [,end]]) -> int\n\
1751\n\
1752Return the highest index in S where substring sub is found,\n\
1753such that sub is contained within s[start,end]. Optional\n\
1754arguments start and end are interpreted as in slice notation.\n\
1755\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001756Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757
1758static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001759string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001761 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001762 if (result == -2)
1763 return NULL;
1764 return PyInt_FromLong(result);
1765}
1766
1767
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001768PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001769"S.rindex(sub [,start [,end]]) -> int\n\
1770\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001771Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001772
1773static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001774string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001776 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001777 if (result == -2)
1778 return NULL;
1779 if (result == -1) {
1780 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001781 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782 return NULL;
1783 }
1784 return PyInt_FromLong(result);
1785}
1786
1787
1788static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001789do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1790{
1791 char *s = PyString_AS_STRING(self);
1792 int len = PyString_GET_SIZE(self);
1793 char *sep = PyString_AS_STRING(sepobj);
1794 int seplen = PyString_GET_SIZE(sepobj);
1795 int i, j;
1796
1797 i = 0;
1798 if (striptype != RIGHTSTRIP) {
1799 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1800 i++;
1801 }
1802 }
1803
1804 j = len;
1805 if (striptype != LEFTSTRIP) {
1806 do {
1807 j--;
1808 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1809 j++;
1810 }
1811
1812 if (i == 0 && j == len && PyString_CheckExact(self)) {
1813 Py_INCREF(self);
1814 return (PyObject*)self;
1815 }
1816 else
1817 return PyString_FromStringAndSize(s+i, j-i);
1818}
1819
1820
1821static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001822do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823{
1824 char *s = PyString_AS_STRING(self);
1825 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 i = 0;
1828 if (striptype != RIGHTSTRIP) {
1829 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1830 i++;
1831 }
1832 }
1833
1834 j = len;
1835 if (striptype != LEFTSTRIP) {
1836 do {
1837 j--;
1838 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1839 j++;
1840 }
1841
Tim Peters8fa5dd02001-09-12 02:18:30 +00001842 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843 Py_INCREF(self);
1844 return (PyObject*)self;
1845 }
1846 else
1847 return PyString_FromStringAndSize(s+i, j-i);
1848}
1849
1850
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001851static PyObject *
1852do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1853{
1854 PyObject *sep = NULL;
1855
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001856 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001857 return NULL;
1858
1859 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001860 if (PyString_Check(sep))
1861 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001862#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001863 else if (PyUnicode_Check(sep)) {
1864 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1865 PyObject *res;
1866 if (uniself==NULL)
1867 return NULL;
1868 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1869 striptype, sep);
1870 Py_DECREF(uniself);
1871 return res;
1872 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001873#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001874 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001875 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001876#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001877 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001878#else
1879 "%s arg must be None or str",
1880#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001881 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001882 return NULL;
1883 }
1884 return do_xstrip(self, striptype, sep);
1885 }
1886
1887 return do_strip(self, striptype);
1888}
1889
1890
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001891PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001892"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893\n\
1894Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001895whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001896If chars is given and not None, remove characters in chars instead.\n\
1897If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898
1899static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001900string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001902 if (PyTuple_GET_SIZE(args) == 0)
1903 return do_strip(self, BOTHSTRIP); /* Common case */
1904 else
1905 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906}
1907
1908
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001909PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001910"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001911\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001912Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001913If chars is given and not None, remove characters in chars instead.\n\
1914If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915
1916static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001917string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001919 if (PyTuple_GET_SIZE(args) == 0)
1920 return do_strip(self, LEFTSTRIP); /* Common case */
1921 else
1922 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923}
1924
1925
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001926PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001927"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001929Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001930If chars is given and not None, remove characters in chars instead.\n\
1931If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932
1933static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001934string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001936 if (PyTuple_GET_SIZE(args) == 0)
1937 return do_strip(self, RIGHTSTRIP); /* Common case */
1938 else
1939 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940}
1941
1942
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001943PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944"S.lower() -> string\n\
1945\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001946Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947
1948static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001949string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001950{
1951 char *s = PyString_AS_STRING(self), *s_new;
1952 int i, n = PyString_GET_SIZE(self);
1953 PyObject *new;
1954
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955 new = PyString_FromStringAndSize(NULL, n);
1956 if (new == NULL)
1957 return NULL;
1958 s_new = PyString_AsString(new);
1959 for (i = 0; i < n; i++) {
1960 int c = Py_CHARMASK(*s++);
1961 if (isupper(c)) {
1962 *s_new = tolower(c);
1963 } else
1964 *s_new = c;
1965 s_new++;
1966 }
1967 return new;
1968}
1969
1970
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001971PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972"S.upper() -> string\n\
1973\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001974Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975
1976static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001977string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978{
1979 char *s = PyString_AS_STRING(self), *s_new;
1980 int i, n = PyString_GET_SIZE(self);
1981 PyObject *new;
1982
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983 new = PyString_FromStringAndSize(NULL, n);
1984 if (new == NULL)
1985 return NULL;
1986 s_new = PyString_AsString(new);
1987 for (i = 0; i < n; i++) {
1988 int c = Py_CHARMASK(*s++);
1989 if (islower(c)) {
1990 *s_new = toupper(c);
1991 } else
1992 *s_new = c;
1993 s_new++;
1994 }
1995 return new;
1996}
1997
1998
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001999PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002000"S.title() -> string\n\
2001\n\
2002Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002003characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002004
2005static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002006string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002007{
2008 char *s = PyString_AS_STRING(self), *s_new;
2009 int i, n = PyString_GET_SIZE(self);
2010 int previous_is_cased = 0;
2011 PyObject *new;
2012
Guido van Rossum4c08d552000-03-10 22:55:18 +00002013 new = PyString_FromStringAndSize(NULL, n);
2014 if (new == NULL)
2015 return NULL;
2016 s_new = PyString_AsString(new);
2017 for (i = 0; i < n; i++) {
2018 int c = Py_CHARMASK(*s++);
2019 if (islower(c)) {
2020 if (!previous_is_cased)
2021 c = toupper(c);
2022 previous_is_cased = 1;
2023 } else if (isupper(c)) {
2024 if (previous_is_cased)
2025 c = tolower(c);
2026 previous_is_cased = 1;
2027 } else
2028 previous_is_cased = 0;
2029 *s_new++ = c;
2030 }
2031 return new;
2032}
2033
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002034PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035"S.capitalize() -> string\n\
2036\n\
2037Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002038capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039
2040static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002041string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042{
2043 char *s = PyString_AS_STRING(self), *s_new;
2044 int i, n = PyString_GET_SIZE(self);
2045 PyObject *new;
2046
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047 new = PyString_FromStringAndSize(NULL, n);
2048 if (new == NULL)
2049 return NULL;
2050 s_new = PyString_AsString(new);
2051 if (0 < n) {
2052 int c = Py_CHARMASK(*s++);
2053 if (islower(c))
2054 *s_new = toupper(c);
2055 else
2056 *s_new = c;
2057 s_new++;
2058 }
2059 for (i = 1; i < n; i++) {
2060 int c = Py_CHARMASK(*s++);
2061 if (isupper(c))
2062 *s_new = tolower(c);
2063 else
2064 *s_new = c;
2065 s_new++;
2066 }
2067 return new;
2068}
2069
2070
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002071PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072"S.count(sub[, start[, end]]) -> int\n\
2073\n\
2074Return the number of occurrences of substring sub in string\n\
2075S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002076interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077
2078static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002079string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002081 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002082 int len = PyString_GET_SIZE(self), n;
2083 int i = 0, last = INT_MAX;
2084 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086
Guido van Rossumc6821402000-05-08 14:08:05 +00002087 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2088 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002090
Guido van Rossum4c08d552000-03-10 22:55:18 +00002091 if (PyString_Check(subobj)) {
2092 sub = PyString_AS_STRING(subobj);
2093 n = PyString_GET_SIZE(subobj);
2094 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002095#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002096 else if (PyUnicode_Check(subobj)) {
2097 int count;
2098 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2099 if (count == -1)
2100 return NULL;
2101 else
2102 return PyInt_FromLong((long) count);
2103 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002104#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002105 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2106 return NULL;
2107
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002108 string_adjust_indices(&i, &last, len);
2109
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110 m = last + 1 - n;
2111 if (n == 0)
2112 return PyInt_FromLong((long) (m-i));
2113
2114 r = 0;
2115 while (i < m) {
2116 if (!memcmp(s+i, sub, n)) {
2117 r++;
2118 i += n;
2119 } else {
2120 i++;
2121 }
2122 }
2123 return PyInt_FromLong((long) r);
2124}
2125
2126
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002127PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128"S.swapcase() -> string\n\
2129\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002130Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002131converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132
2133static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002134string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135{
2136 char *s = PyString_AS_STRING(self), *s_new;
2137 int i, n = PyString_GET_SIZE(self);
2138 PyObject *new;
2139
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140 new = PyString_FromStringAndSize(NULL, n);
2141 if (new == NULL)
2142 return NULL;
2143 s_new = PyString_AsString(new);
2144 for (i = 0; i < n; i++) {
2145 int c = Py_CHARMASK(*s++);
2146 if (islower(c)) {
2147 *s_new = toupper(c);
2148 }
2149 else if (isupper(c)) {
2150 *s_new = tolower(c);
2151 }
2152 else
2153 *s_new = c;
2154 s_new++;
2155 }
2156 return new;
2157}
2158
2159
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002160PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161"S.translate(table [,deletechars]) -> string\n\
2162\n\
2163Return a copy of the string S, where all characters occurring\n\
2164in the optional argument deletechars are removed, and the\n\
2165remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002166translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167
2168static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002169string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 register char *input, *output;
2172 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173 register int i, c, changed = 0;
2174 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176 int inlen, tablen, dellen = 0;
2177 PyObject *result;
2178 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002179 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002181 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002182 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002184
2185 if (PyString_Check(tableobj)) {
2186 table1 = PyString_AS_STRING(tableobj);
2187 tablen = PyString_GET_SIZE(tableobj);
2188 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002189#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002191 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002192 parameter; instead a mapping to None will cause characters
2193 to be deleted. */
2194 if (delobj != NULL) {
2195 PyErr_SetString(PyExc_TypeError,
2196 "deletions are implemented differently for unicode");
2197 return NULL;
2198 }
2199 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2200 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002201#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002204
Martin v. Löwis00b61272002-12-12 20:03:19 +00002205 if (tablen != 256) {
2206 PyErr_SetString(PyExc_ValueError,
2207 "translation table must be 256 characters long");
2208 return NULL;
2209 }
2210
Guido van Rossum4c08d552000-03-10 22:55:18 +00002211 if (delobj != NULL) {
2212 if (PyString_Check(delobj)) {
2213 del_table = PyString_AS_STRING(delobj);
2214 dellen = PyString_GET_SIZE(delobj);
2215 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002216#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002217 else if (PyUnicode_Check(delobj)) {
2218 PyErr_SetString(PyExc_TypeError,
2219 "deletions are implemented differently for unicode");
2220 return NULL;
2221 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002222#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002223 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2224 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002225 }
2226 else {
2227 del_table = NULL;
2228 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229 }
2230
2231 table = table1;
2232 inlen = PyString_Size(input_obj);
2233 result = PyString_FromStringAndSize((char *)NULL, inlen);
2234 if (result == NULL)
2235 return NULL;
2236 output_start = output = PyString_AsString(result);
2237 input = PyString_AsString(input_obj);
2238
2239 if (dellen == 0) {
2240 /* If no deletions are required, use faster code */
2241 for (i = inlen; --i >= 0; ) {
2242 c = Py_CHARMASK(*input++);
2243 if (Py_CHARMASK((*output++ = table[c])) != c)
2244 changed = 1;
2245 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002246 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 return result;
2248 Py_DECREF(result);
2249 Py_INCREF(input_obj);
2250 return input_obj;
2251 }
2252
2253 for (i = 0; i < 256; i++)
2254 trans_table[i] = Py_CHARMASK(table[i]);
2255
2256 for (i = 0; i < dellen; i++)
2257 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2258
2259 for (i = inlen; --i >= 0; ) {
2260 c = Py_CHARMASK(*input++);
2261 if (trans_table[c] != -1)
2262 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2263 continue;
2264 changed = 1;
2265 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002266 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267 Py_DECREF(result);
2268 Py_INCREF(input_obj);
2269 return input_obj;
2270 }
2271 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002272 if (inlen > 0)
2273 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274 return result;
2275}
2276
2277
2278/* What follows is used for implementing replace(). Perry Stoll. */
2279
2280/*
2281 mymemfind
2282
2283 strstr replacement for arbitrary blocks of memory.
2284
Barry Warsaw51ac5802000-03-20 16:36:48 +00002285 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002286 contents of memory pointed to by PAT. Returns the index into MEM if
2287 found, or -1 if not found. If len of PAT is greater than length of
2288 MEM, the function returns -1.
2289*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002290static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002291mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002292{
2293 register int ii;
2294
2295 /* pattern can not occur in the last pat_len-1 chars */
2296 len -= pat_len;
2297
2298 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002299 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300 return ii;
2301 }
2302 }
2303 return -1;
2304}
2305
2306/*
2307 mymemcnt
2308
2309 Return the number of distinct times PAT is found in MEM.
2310 meaning mem=1111 and pat==11 returns 2.
2311 mem=11111 and pat==11 also return 2.
2312 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002313static int
Tim Petersc2e7da92000-07-09 08:02:21 +00002314mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002315{
2316 register int offset = 0;
2317 int nfound = 0;
2318
2319 while (len >= 0) {
2320 offset = mymemfind(mem, len, pat, pat_len);
2321 if (offset == -1)
2322 break;
2323 mem += offset + pat_len;
2324 len -= offset + pat_len;
2325 nfound++;
2326 }
2327 return nfound;
2328}
2329
2330/*
2331 mymemreplace
2332
Thomas Wouters7e474022000-07-16 12:04:32 +00002333 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334 replaced with SUB.
2335
Thomas Wouters7e474022000-07-16 12:04:32 +00002336 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 of PAT in STR, then the original string is returned. Otherwise, a new
2338 string is allocated here and returned.
2339
2340 on return, out_len is:
2341 the length of output string, or
2342 -1 if the input string is returned, or
2343 unchanged if an error occurs (no memory).
2344
2345 return value is:
2346 the new string allocated locally, or
2347 NULL if an error occurred.
2348*/
2349static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002350mymemreplace(const char *str, int len, /* input string */
2351 const char *pat, int pat_len, /* pattern string to find */
2352 const char *sub, int sub_len, /* substitution string */
2353 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002354 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355{
2356 char *out_s;
2357 char *new_s;
2358 int nfound, offset, new_len;
2359
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002360 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361 goto return_same;
2362
2363 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002364 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002365 if (count < 0)
2366 count = INT_MAX;
2367 else if (nfound > count)
2368 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369 if (nfound == 0)
2370 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002371
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002373 if (new_len == 0) {
2374 /* Have to allocate something for the caller to free(). */
2375 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002376 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002377 return NULL;
2378 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002380 else {
2381 assert(new_len > 0);
2382 new_s = (char *)PyMem_MALLOC(new_len);
2383 if (new_s == NULL)
2384 return NULL;
2385 out_s = new_s;
2386
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002387 if (pat_len > 0) {
2388 for (; nfound > 0; --nfound) {
2389 /* find index of next instance of pattern */
2390 offset = mymemfind(str, len, pat, pat_len);
2391 if (offset == -1)
2392 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002393
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002394 /* copy non matching part of input string */
2395 memcpy(new_s, str, offset);
2396 str += offset + pat_len;
2397 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002398
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002399 /* copy substitute into the output string */
2400 new_s += offset;
2401 memcpy(new_s, sub, sub_len);
2402 new_s += sub_len;
2403 }
2404 /* copy any remaining values into output string */
2405 if (len > 0)
2406 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002407 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002408 else {
2409 for (;;++str, --len) {
2410 memcpy(new_s, sub, sub_len);
2411 new_s += sub_len;
2412 if (--nfound <= 0) {
2413 memcpy(new_s, str, len);
2414 break;
2415 }
2416 *new_s++ = *str;
2417 }
2418 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002419 }
2420 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002421 return out_s;
2422
2423 return_same:
2424 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002425 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002426}
2427
2428
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002429PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002430"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431\n\
2432Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002433old replaced by new. If the optional argument count is\n\
2434given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002435
2436static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002437string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002438{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002439 const char *str = PyString_AS_STRING(self), *sub, *repl;
2440 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002441 const int len = PyString_GET_SIZE(self);
2442 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002443 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002445 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446
Guido van Rossum4c08d552000-03-10 22:55:18 +00002447 if (!PyArg_ParseTuple(args, "OO|i:replace",
2448 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002450
2451 if (PyString_Check(subobj)) {
2452 sub = PyString_AS_STRING(subobj);
2453 sub_len = PyString_GET_SIZE(subobj);
2454 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002455#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002456 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002457 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002458 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002459#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002460 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2461 return NULL;
2462
2463 if (PyString_Check(replobj)) {
2464 repl = PyString_AS_STRING(replobj);
2465 repl_len = PyString_GET_SIZE(replobj);
2466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002467#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002468 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002469 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002470 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002471#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002472 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2473 return NULL;
2474
Guido van Rossum4c08d552000-03-10 22:55:18 +00002475 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476 if (new_s == NULL) {
2477 PyErr_NoMemory();
2478 return NULL;
2479 }
2480 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002481 if (PyString_CheckExact(self)) {
2482 /* we're returning another reference to self */
2483 new = (PyObject*)self;
2484 Py_INCREF(new);
2485 }
2486 else {
2487 new = PyString_FromStringAndSize(str, len);
2488 if (new == NULL)
2489 return NULL;
2490 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002491 }
2492 else {
2493 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002494 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002495 }
2496 return new;
2497}
2498
2499
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002500PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002501"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002502\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002503Return True if S starts with the specified prefix, False otherwise.\n\
2504With optional start, test S beginning at that position.\n\
2505With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506
2507static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002508string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002509{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002510 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002511 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002512 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002513 int plen;
2514 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002515 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002516 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002517
Guido van Rossumc6821402000-05-08 14:08:05 +00002518 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2519 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002520 return NULL;
2521 if (PyString_Check(subobj)) {
2522 prefix = PyString_AS_STRING(subobj);
2523 plen = PyString_GET_SIZE(subobj);
2524 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002525#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002526 else if (PyUnicode_Check(subobj)) {
2527 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002528 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002529 subobj, start, end, -1);
2530 if (rc == -1)
2531 return NULL;
2532 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002533 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002534 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002535#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002536 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537 return NULL;
2538
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002539 string_adjust_indices(&start, &end, len);
2540
2541 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002542 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002543
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002544 if (end-start >= plen)
2545 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2546 else
2547 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002548}
2549
2550
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002551PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002552"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002553\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002554Return True if S ends with the specified suffix, False otherwise.\n\
2555With optional start, test S beginning at that position.\n\
2556With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002557
2558static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002559string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002560{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002562 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002563 const char* suffix;
2564 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002565 int start = 0;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002566 int end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002567 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002568
Guido van Rossumc6821402000-05-08 14:08:05 +00002569 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2570 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002571 return NULL;
2572 if (PyString_Check(subobj)) {
2573 suffix = PyString_AS_STRING(subobj);
2574 slen = PyString_GET_SIZE(subobj);
2575 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002576#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002577 else if (PyUnicode_Check(subobj)) {
2578 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002579 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002580 subobj, start, end, +1);
2581 if (rc == -1)
2582 return NULL;
2583 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002584 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002585 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002586#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002587 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002588 return NULL;
2589
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002590 string_adjust_indices(&start, &end, len);
2591
2592 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002593 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002594
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002595 if (end-slen > start)
2596 start = end - slen;
2597 if (end-start >= slen)
2598 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2599 else
2600 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002601}
2602
2603
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002604PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002605"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002606\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002607Encodes S using the codec registered for encoding. encoding defaults\n\
2608to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002609handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002610a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2611'xmlcharrefreplace' as well as any other name registered with\n\
2612codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002613
2614static PyObject *
2615string_encode(PyStringObject *self, PyObject *args)
2616{
2617 char *encoding = NULL;
2618 char *errors = NULL;
2619 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2620 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002621 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2622}
2623
2624
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002625PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002626"S.decode([encoding[,errors]]) -> object\n\
2627\n\
2628Decodes S using the codec registered for encoding. encoding defaults\n\
2629to the default encoding. errors may be given to set a different error\n\
2630handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002631a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2632as well as any other name registerd with codecs.register_error that is\n\
2633able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002634
2635static PyObject *
2636string_decode(PyStringObject *self, PyObject *args)
2637{
2638 char *encoding = NULL;
2639 char *errors = NULL;
2640 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2641 return NULL;
2642 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002643}
2644
2645
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002646PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002647"S.expandtabs([tabsize]) -> string\n\
2648\n\
2649Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002650If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002651
2652static PyObject*
2653string_expandtabs(PyStringObject *self, PyObject *args)
2654{
2655 const char *e, *p;
2656 char *q;
2657 int i, j;
2658 PyObject *u;
2659 int tabsize = 8;
2660
2661 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2662 return NULL;
2663
Thomas Wouters7e474022000-07-16 12:04:32 +00002664 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002665 i = j = 0;
2666 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2667 for (p = PyString_AS_STRING(self); p < e; p++)
2668 if (*p == '\t') {
2669 if (tabsize > 0)
2670 j += tabsize - (j % tabsize);
2671 }
2672 else {
2673 j++;
2674 if (*p == '\n' || *p == '\r') {
2675 i += j;
2676 j = 0;
2677 }
2678 }
2679
2680 /* Second pass: create output string and fill it */
2681 u = PyString_FromStringAndSize(NULL, i + j);
2682 if (!u)
2683 return NULL;
2684
2685 j = 0;
2686 q = PyString_AS_STRING(u);
2687
2688 for (p = PyString_AS_STRING(self); p < e; p++)
2689 if (*p == '\t') {
2690 if (tabsize > 0) {
2691 i = tabsize - (j % tabsize);
2692 j += i;
2693 while (i--)
2694 *q++ = ' ';
2695 }
2696 }
2697 else {
2698 j++;
2699 *q++ = *p;
2700 if (*p == '\n' || *p == '\r')
2701 j = 0;
2702 }
2703
2704 return u;
2705}
2706
Tim Peters8fa5dd02001-09-12 02:18:30 +00002707static PyObject *
2708pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002709{
2710 PyObject *u;
2711
2712 if (left < 0)
2713 left = 0;
2714 if (right < 0)
2715 right = 0;
2716
Tim Peters8fa5dd02001-09-12 02:18:30 +00002717 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002718 Py_INCREF(self);
2719 return (PyObject *)self;
2720 }
2721
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002722 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002723 left + PyString_GET_SIZE(self) + right);
2724 if (u) {
2725 if (left)
2726 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002727 memcpy(PyString_AS_STRING(u) + left,
2728 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002729 PyString_GET_SIZE(self));
2730 if (right)
2731 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2732 fill, right);
2733 }
2734
2735 return u;
2736}
2737
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002738PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002739"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002740"\n"
2741"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002742"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002743
2744static PyObject *
2745string_ljust(PyStringObject *self, PyObject *args)
2746{
2747 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002748 char fillchar = ' ';
2749
2750 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002751 return NULL;
2752
Tim Peters8fa5dd02001-09-12 02:18:30 +00002753 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002754 Py_INCREF(self);
2755 return (PyObject*) self;
2756 }
2757
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002758 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002759}
2760
2761
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002762PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002763"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002764"\n"
2765"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002766"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002767
2768static PyObject *
2769string_rjust(PyStringObject *self, PyObject *args)
2770{
2771 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002772 char fillchar = ' ';
2773
2774 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002775 return NULL;
2776
Tim Peters8fa5dd02001-09-12 02:18:30 +00002777 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002778 Py_INCREF(self);
2779 return (PyObject*) self;
2780 }
2781
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002782 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002783}
2784
2785
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002786PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002787"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002788"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002789"Return S centered in a string of length width. Padding is\n"
2790"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002791
2792static PyObject *
2793string_center(PyStringObject *self, PyObject *args)
2794{
2795 int marg, left;
2796 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002797 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002798
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002799 if (!PyArg_ParseTuple(args, "i|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002800 return NULL;
2801
Tim Peters8fa5dd02001-09-12 02:18:30 +00002802 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002803 Py_INCREF(self);
2804 return (PyObject*) self;
2805 }
2806
2807 marg = width - PyString_GET_SIZE(self);
2808 left = marg / 2 + (marg & width & 1);
2809
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002810 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002811}
2812
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002813PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002814"S.zfill(width) -> string\n"
2815"\n"
2816"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002817"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002818
2819static PyObject *
2820string_zfill(PyStringObject *self, PyObject *args)
2821{
2822 int fill;
2823 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002824 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002825
2826 int width;
2827 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2828 return NULL;
2829
2830 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002831 if (PyString_CheckExact(self)) {
2832 Py_INCREF(self);
2833 return (PyObject*) self;
2834 }
2835 else
2836 return PyString_FromStringAndSize(
2837 PyString_AS_STRING(self),
2838 PyString_GET_SIZE(self)
2839 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002840 }
2841
2842 fill = width - PyString_GET_SIZE(self);
2843
2844 s = pad(self, fill, 0, '0');
2845
2846 if (s == NULL)
2847 return NULL;
2848
2849 p = PyString_AS_STRING(s);
2850 if (p[fill] == '+' || p[fill] == '-') {
2851 /* move sign to beginning of string */
2852 p[0] = p[fill];
2853 p[fill] = '0';
2854 }
2855
2856 return (PyObject*) s;
2857}
2858
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002859PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002860"S.isspace() -> bool\n\
2861\n\
2862Return True if all characters in S are whitespace\n\
2863and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002864
2865static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002866string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002867{
Fred Drakeba096332000-07-09 07:04:36 +00002868 register const unsigned char *p
2869 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002870 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871
Guido van Rossum4c08d552000-03-10 22:55:18 +00002872 /* Shortcut for single character strings */
2873 if (PyString_GET_SIZE(self) == 1 &&
2874 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002875 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002876
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002877 /* Special case for empty strings */
2878 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002879 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002880
Guido van Rossum4c08d552000-03-10 22:55:18 +00002881 e = p + PyString_GET_SIZE(self);
2882 for (; p < e; p++) {
2883 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002884 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002885 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002886 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002887}
2888
2889
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002890PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002891"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002892\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002893Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002894and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002895
2896static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002897string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002898{
Fred Drakeba096332000-07-09 07:04:36 +00002899 register const unsigned char *p
2900 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002901 register const unsigned char *e;
2902
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002903 /* Shortcut for single character strings */
2904 if (PyString_GET_SIZE(self) == 1 &&
2905 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002906 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002907
2908 /* Special case for empty strings */
2909 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002910 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002911
2912 e = p + PyString_GET_SIZE(self);
2913 for (; p < e; p++) {
2914 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002915 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002916 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002917 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002918}
2919
2920
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002921PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002922"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002923\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002924Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002925and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002926
2927static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002928string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002929{
Fred Drakeba096332000-07-09 07:04:36 +00002930 register const unsigned char *p
2931 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002932 register const unsigned char *e;
2933
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002934 /* Shortcut for single character strings */
2935 if (PyString_GET_SIZE(self) == 1 &&
2936 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002937 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002938
2939 /* Special case for empty strings */
2940 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002941 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002942
2943 e = p + PyString_GET_SIZE(self);
2944 for (; p < e; p++) {
2945 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002946 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002947 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002948 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002949}
2950
2951
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002952PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002953"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002954\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002955Return True if all characters in S are digits\n\
2956and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002957
2958static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002959string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002960{
Fred Drakeba096332000-07-09 07:04:36 +00002961 register const unsigned char *p
2962 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002963 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002964
Guido van Rossum4c08d552000-03-10 22:55:18 +00002965 /* Shortcut for single character strings */
2966 if (PyString_GET_SIZE(self) == 1 &&
2967 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002968 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002969
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002970 /* Special case for empty strings */
2971 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002972 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002973
Guido van Rossum4c08d552000-03-10 22:55:18 +00002974 e = p + PyString_GET_SIZE(self);
2975 for (; p < e; p++) {
2976 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002977 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002978 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002979 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002980}
2981
2982
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002983PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002984"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002985\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002986Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002987at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002988
2989static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002990string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002991{
Fred Drakeba096332000-07-09 07:04:36 +00002992 register const unsigned char *p
2993 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002994 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002995 int cased;
2996
Guido van Rossum4c08d552000-03-10 22:55:18 +00002997 /* Shortcut for single character strings */
2998 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002999 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003000
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003001 /* Special case for empty strings */
3002 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003003 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003004
Guido van Rossum4c08d552000-03-10 22:55:18 +00003005 e = p + PyString_GET_SIZE(self);
3006 cased = 0;
3007 for (; p < e; p++) {
3008 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003009 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003010 else if (!cased && islower(*p))
3011 cased = 1;
3012 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003013 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003014}
3015
3016
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003017PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003018"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003019\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003020Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003021at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003022
3023static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003024string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003025{
Fred Drakeba096332000-07-09 07:04:36 +00003026 register const unsigned char *p
3027 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003028 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003029 int cased;
3030
Guido van Rossum4c08d552000-03-10 22:55:18 +00003031 /* Shortcut for single character strings */
3032 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003033 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003034
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003035 /* Special case for empty strings */
3036 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003037 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003038
Guido van Rossum4c08d552000-03-10 22:55:18 +00003039 e = p + PyString_GET_SIZE(self);
3040 cased = 0;
3041 for (; p < e; p++) {
3042 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003043 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003044 else if (!cased && isupper(*p))
3045 cased = 1;
3046 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003047 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003048}
3049
3050
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003051PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003052"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003053\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003054Return True if S is a titlecased string and there is at least one\n\
3055character in S, i.e. uppercase characters may only follow uncased\n\
3056characters and lowercase characters only cased ones. Return False\n\
3057otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003058
3059static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003060string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003061{
Fred Drakeba096332000-07-09 07:04:36 +00003062 register const unsigned char *p
3063 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003064 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003065 int cased, previous_is_cased;
3066
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067 /* Shortcut for single character strings */
3068 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003069 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003071 /* Special case for empty strings */
3072 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003073 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003074
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075 e = p + PyString_GET_SIZE(self);
3076 cased = 0;
3077 previous_is_cased = 0;
3078 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003079 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080
3081 if (isupper(ch)) {
3082 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003083 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003084 previous_is_cased = 1;
3085 cased = 1;
3086 }
3087 else if (islower(ch)) {
3088 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003089 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090 previous_is_cased = 1;
3091 cased = 1;
3092 }
3093 else
3094 previous_is_cased = 0;
3095 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003096 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003097}
3098
3099
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003100PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003101"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003102\n\
3103Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003104Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003105is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003106
3107#define SPLIT_APPEND(data, left, right) \
3108 str = PyString_FromStringAndSize(data + left, right - left); \
3109 if (!str) \
3110 goto onError; \
3111 if (PyList_Append(list, str)) { \
3112 Py_DECREF(str); \
3113 goto onError; \
3114 } \
3115 else \
3116 Py_DECREF(str);
3117
3118static PyObject*
3119string_splitlines(PyStringObject *self, PyObject *args)
3120{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003121 register int i;
3122 register int j;
3123 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003124 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003125 PyObject *list;
3126 PyObject *str;
3127 char *data;
3128
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003129 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003130 return NULL;
3131
3132 data = PyString_AS_STRING(self);
3133 len = PyString_GET_SIZE(self);
3134
Guido van Rossum4c08d552000-03-10 22:55:18 +00003135 list = PyList_New(0);
3136 if (!list)
3137 goto onError;
3138
3139 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003140 int eol;
3141
Guido van Rossum4c08d552000-03-10 22:55:18 +00003142 /* Find a line and append it */
3143 while (i < len && data[i] != '\n' && data[i] != '\r')
3144 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145
3146 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003147 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003148 if (i < len) {
3149 if (data[i] == '\r' && i + 1 < len &&
3150 data[i+1] == '\n')
3151 i += 2;
3152 else
3153 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003154 if (keepends)
3155 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003156 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003157 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003158 j = i;
3159 }
3160 if (j < len) {
3161 SPLIT_APPEND(data, j, len);
3162 }
3163
3164 return list;
3165
3166 onError:
3167 Py_DECREF(list);
3168 return NULL;
3169}
3170
3171#undef SPLIT_APPEND
3172
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003173static PyObject *
3174string_getnewargs(PyStringObject *v)
3175{
3176 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3177}
3178
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003179
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003180static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003181string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182 /* Counterparts of the obsolete stropmodule functions; except
3183 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003184 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3185 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003186 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003187 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3188 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003189 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3190 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3191 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3192 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3193 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3194 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3195 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003196 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3197 capitalize__doc__},
3198 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3199 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3200 endswith__doc__},
3201 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3202 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3203 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3204 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3205 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3206 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3207 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3208 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3209 startswith__doc__},
3210 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3211 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3212 swapcase__doc__},
3213 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3214 translate__doc__},
3215 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3216 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3217 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3218 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3219 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3220 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3221 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3222 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3223 expandtabs__doc__},
3224 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3225 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003226 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003227 {NULL, NULL} /* sentinel */
3228};
3229
Jeremy Hylton938ace62002-07-17 16:30:39 +00003230static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003231str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3232
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003233static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003234string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003235{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003236 PyObject *x = NULL;
3237 static char *kwlist[] = {"object", 0};
3238
Guido van Rossumae960af2001-08-30 03:11:59 +00003239 if (type != &PyString_Type)
3240 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003241 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3242 return NULL;
3243 if (x == NULL)
3244 return PyString_FromString("");
3245 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003246}
3247
Guido van Rossumae960af2001-08-30 03:11:59 +00003248static PyObject *
3249str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3250{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003251 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003252 int n;
3253
3254 assert(PyType_IsSubtype(type, &PyString_Type));
3255 tmp = string_new(&PyString_Type, args, kwds);
3256 if (tmp == NULL)
3257 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003258 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003259 n = PyString_GET_SIZE(tmp);
3260 pnew = type->tp_alloc(type, n);
3261 if (pnew != NULL) {
3262 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003263 ((PyStringObject *)pnew)->ob_shash =
3264 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003265 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003266 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003267 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003268 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003269}
3270
Guido van Rossumcacfc072002-05-24 19:01:59 +00003271static PyObject *
3272basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3273{
3274 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003275 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003276 return NULL;
3277}
3278
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003279static PyObject *
3280string_mod(PyObject *v, PyObject *w)
3281{
3282 if (!PyString_Check(v)) {
3283 Py_INCREF(Py_NotImplemented);
3284 return Py_NotImplemented;
3285 }
3286 return PyString_Format(v, w);
3287}
3288
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003289PyDoc_STRVAR(basestring_doc,
3290"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003291
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003292static PyNumberMethods string_as_number = {
3293 0, /*nb_add*/
3294 0, /*nb_subtract*/
3295 0, /*nb_multiply*/
3296 0, /*nb_divide*/
3297 string_mod, /*nb_remainder*/
3298};
3299
3300
Guido van Rossumcacfc072002-05-24 19:01:59 +00003301PyTypeObject PyBaseString_Type = {
3302 PyObject_HEAD_INIT(&PyType_Type)
3303 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003304 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003305 0,
3306 0,
3307 0, /* tp_dealloc */
3308 0, /* tp_print */
3309 0, /* tp_getattr */
3310 0, /* tp_setattr */
3311 0, /* tp_compare */
3312 0, /* tp_repr */
3313 0, /* tp_as_number */
3314 0, /* tp_as_sequence */
3315 0, /* tp_as_mapping */
3316 0, /* tp_hash */
3317 0, /* tp_call */
3318 0, /* tp_str */
3319 0, /* tp_getattro */
3320 0, /* tp_setattro */
3321 0, /* tp_as_buffer */
3322 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3323 basestring_doc, /* tp_doc */
3324 0, /* tp_traverse */
3325 0, /* tp_clear */
3326 0, /* tp_richcompare */
3327 0, /* tp_weaklistoffset */
3328 0, /* tp_iter */
3329 0, /* tp_iternext */
3330 0, /* tp_methods */
3331 0, /* tp_members */
3332 0, /* tp_getset */
3333 &PyBaseObject_Type, /* tp_base */
3334 0, /* tp_dict */
3335 0, /* tp_descr_get */
3336 0, /* tp_descr_set */
3337 0, /* tp_dictoffset */
3338 0, /* tp_init */
3339 0, /* tp_alloc */
3340 basestring_new, /* tp_new */
3341 0, /* tp_free */
3342};
3343
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003344PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003345"str(object) -> string\n\
3346\n\
3347Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003348If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003349
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003350PyTypeObject PyString_Type = {
3351 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003352 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003353 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003354 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003355 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003356 (destructor)string_dealloc, /* tp_dealloc */
3357 (printfunc)string_print, /* tp_print */
3358 0, /* tp_getattr */
3359 0, /* tp_setattr */
3360 0, /* tp_compare */
3361 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003362 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003363 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003364 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003365 (hashfunc)string_hash, /* tp_hash */
3366 0, /* tp_call */
3367 (reprfunc)string_str, /* tp_str */
3368 PyObject_GenericGetAttr, /* tp_getattro */
3369 0, /* tp_setattro */
3370 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003371 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3372 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003373 string_doc, /* tp_doc */
3374 0, /* tp_traverse */
3375 0, /* tp_clear */
3376 (richcmpfunc)string_richcompare, /* tp_richcompare */
3377 0, /* tp_weaklistoffset */
3378 0, /* tp_iter */
3379 0, /* tp_iternext */
3380 string_methods, /* tp_methods */
3381 0, /* tp_members */
3382 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003383 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003384 0, /* tp_dict */
3385 0, /* tp_descr_get */
3386 0, /* tp_descr_set */
3387 0, /* tp_dictoffset */
3388 0, /* tp_init */
3389 0, /* tp_alloc */
3390 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003391 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003392};
3393
3394void
Fred Drakeba096332000-07-09 07:04:36 +00003395PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003396{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003397 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003398 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003399 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003400 if (w == NULL || !PyString_Check(*pv)) {
3401 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003402 *pv = NULL;
3403 return;
3404 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003405 v = string_concat((PyStringObject *) *pv, w);
3406 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003407 *pv = v;
3408}
3409
Guido van Rossum013142a1994-08-30 08:19:36 +00003410void
Fred Drakeba096332000-07-09 07:04:36 +00003411PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003412{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003413 PyString_Concat(pv, w);
3414 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003415}
3416
3417
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003418/* The following function breaks the notion that strings are immutable:
3419 it changes the size of a string. We get away with this only if there
3420 is only one module referencing the object. You can also think of it
3421 as creating a new string object and destroying the old one, only
3422 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003423 already be known to some other part of the code...
3424 Note that if there's not enough memory to resize the string, the original
3425 string object at *pv is deallocated, *pv is set to NULL, an "out of
3426 memory" exception is set, and -1 is returned. Else (on success) 0 is
3427 returned, and the value in *pv may or may not be the same as on input.
3428 As always, an extra byte is allocated for a trailing \0 byte (newsize
3429 does *not* include that), and a trailing \0 byte is stored.
3430*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003431
3432int
Fred Drakeba096332000-07-09 07:04:36 +00003433_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003434{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003435 register PyObject *v;
3436 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003437 v = *pv;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00003438 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003439 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003440 Py_DECREF(v);
3441 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003442 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003443 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003444 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003445 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003446 _Py_ForgetReference(v);
3447 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003448 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003449 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003450 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003451 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003452 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003453 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003454 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003455 _Py_NewReference(*pv);
3456 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003457 sv->ob_size = newsize;
3458 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003459 return 0;
3460}
Guido van Rossume5372401993-03-16 12:15:04 +00003461
3462/* Helpers for formatstring */
3463
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003464static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003465getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003466{
3467 int argidx = *p_argidx;
3468 if (argidx < arglen) {
3469 (*p_argidx)++;
3470 if (arglen < 0)
3471 return args;
3472 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003473 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003474 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003475 PyErr_SetString(PyExc_TypeError,
3476 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003477 return NULL;
3478}
3479
Tim Peters38fd5b62000-09-21 05:43:11 +00003480/* Format codes
3481 * F_LJUST '-'
3482 * F_SIGN '+'
3483 * F_BLANK ' '
3484 * F_ALT '#'
3485 * F_ZERO '0'
3486 */
Guido van Rossume5372401993-03-16 12:15:04 +00003487#define F_LJUST (1<<0)
3488#define F_SIGN (1<<1)
3489#define F_BLANK (1<<2)
3490#define F_ALT (1<<3)
3491#define F_ZERO (1<<4)
3492
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003493static int
Fred Drakeba096332000-07-09 07:04:36 +00003494formatfloat(char *buf, size_t buflen, int flags,
3495 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003496{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003497 /* fmt = '%#.' + `prec` + `type`
3498 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003499 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003500 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003501 x = PyFloat_AsDouble(v);
3502 if (x == -1.0 && PyErr_Occurred()) {
3503 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003504 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003505 }
Guido van Rossume5372401993-03-16 12:15:04 +00003506 if (prec < 0)
3507 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003508 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3509 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003510 /* Worst case length calc to ensure no buffer overrun:
3511
3512 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003513 fmt = %#.<prec>g
3514 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003515 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003516 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003517
3518 'f' formats:
3519 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3520 len = 1 + 50 + 1 + prec = 52 + prec
3521
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003522 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003523 always given), therefore increase the length by one.
3524
3525 */
3526 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3527 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003528 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003529 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003530 return -1;
3531 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003532 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3533 (flags&F_ALT) ? "#" : "",
3534 prec, type);
Tim Peters885d4572001-11-28 20:27:42 +00003535 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003536 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003537}
3538
Tim Peters38fd5b62000-09-21 05:43:11 +00003539/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3540 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3541 * Python's regular ints.
3542 * Return value: a new PyString*, or NULL if error.
3543 * . *pbuf is set to point into it,
3544 * *plen set to the # of chars following that.
3545 * Caller must decref it when done using pbuf.
3546 * The string starting at *pbuf is of the form
3547 * "-"? ("0x" | "0X")? digit+
3548 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003549 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003550 * There will be at least prec digits, zero-filled on the left if
3551 * necessary to get that many.
3552 * val object to be converted
3553 * flags bitmask of format flags; only F_ALT is looked at
3554 * prec minimum number of digits; 0-fill on left if needed
3555 * type a character in [duoxX]; u acts the same as d
3556 *
3557 * CAUTION: o, x and X conversions on regular ints can never
3558 * produce a '-' sign, but can for Python's unbounded ints.
3559 */
3560PyObject*
3561_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3562 char **pbuf, int *plen)
3563{
3564 PyObject *result = NULL;
3565 char *buf;
3566 int i;
3567 int sign; /* 1 if '-', else 0 */
3568 int len; /* number of characters */
3569 int numdigits; /* len == numnondigits + numdigits */
3570 int numnondigits = 0;
3571
3572 switch (type) {
3573 case 'd':
3574 case 'u':
3575 result = val->ob_type->tp_str(val);
3576 break;
3577 case 'o':
3578 result = val->ob_type->tp_as_number->nb_oct(val);
3579 break;
3580 case 'x':
3581 case 'X':
3582 numnondigits = 2;
3583 result = val->ob_type->tp_as_number->nb_hex(val);
3584 break;
3585 default:
3586 assert(!"'type' not in [duoxX]");
3587 }
3588 if (!result)
3589 return NULL;
3590
3591 /* To modify the string in-place, there can only be one reference. */
3592 if (result->ob_refcnt != 1) {
3593 PyErr_BadInternalCall();
3594 return NULL;
3595 }
3596 buf = PyString_AsString(result);
3597 len = PyString_Size(result);
3598 if (buf[len-1] == 'L') {
3599 --len;
3600 buf[len] = '\0';
3601 }
3602 sign = buf[0] == '-';
3603 numnondigits += sign;
3604 numdigits = len - numnondigits;
3605 assert(numdigits > 0);
3606
Tim Petersfff53252001-04-12 18:38:48 +00003607 /* Get rid of base marker unless F_ALT */
3608 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003609 /* Need to skip 0x, 0X or 0. */
3610 int skipped = 0;
3611 switch (type) {
3612 case 'o':
3613 assert(buf[sign] == '0');
3614 /* If 0 is only digit, leave it alone. */
3615 if (numdigits > 1) {
3616 skipped = 1;
3617 --numdigits;
3618 }
3619 break;
3620 case 'x':
3621 case 'X':
3622 assert(buf[sign] == '0');
3623 assert(buf[sign + 1] == 'x');
3624 skipped = 2;
3625 numnondigits -= 2;
3626 break;
3627 }
3628 if (skipped) {
3629 buf += skipped;
3630 len -= skipped;
3631 if (sign)
3632 buf[0] = '-';
3633 }
3634 assert(len == numnondigits + numdigits);
3635 assert(numdigits > 0);
3636 }
3637
3638 /* Fill with leading zeroes to meet minimum width. */
3639 if (prec > numdigits) {
3640 PyObject *r1 = PyString_FromStringAndSize(NULL,
3641 numnondigits + prec);
3642 char *b1;
3643 if (!r1) {
3644 Py_DECREF(result);
3645 return NULL;
3646 }
3647 b1 = PyString_AS_STRING(r1);
3648 for (i = 0; i < numnondigits; ++i)
3649 *b1++ = *buf++;
3650 for (i = 0; i < prec - numdigits; i++)
3651 *b1++ = '0';
3652 for (i = 0; i < numdigits; i++)
3653 *b1++ = *buf++;
3654 *b1 = '\0';
3655 Py_DECREF(result);
3656 result = r1;
3657 buf = PyString_AS_STRING(result);
3658 len = numnondigits + prec;
3659 }
3660
3661 /* Fix up case for hex conversions. */
3662 switch (type) {
3663 case 'x':
3664 /* Need to convert all upper case letters to lower case. */
3665 for (i = 0; i < len; i++)
3666 if (buf[i] >= 'A' && buf[i] <= 'F')
3667 buf[i] += 'a'-'A';
3668 break;
3669 case 'X':
3670 /* Need to convert 0x to 0X (and -0x to -0X). */
3671 if (buf[sign + 1] == 'x')
3672 buf[sign + 1] = 'X';
3673 break;
3674 }
3675 *pbuf = buf;
3676 *plen = len;
3677 return result;
3678}
3679
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003680static int
Fred Drakeba096332000-07-09 07:04:36 +00003681formatint(char *buf, size_t buflen, int flags,
3682 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003683{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003684 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003685 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3686 + 1 + 1 = 24 */
3687 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003688 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003689 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003690
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003691 x = PyInt_AsLong(v);
3692 if (x == -1 && PyErr_Occurred()) {
3693 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003694 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003695 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003696 if (x < 0 && type == 'u') {
3697 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003698 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003699 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3700 sign = "-";
3701 else
3702 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003703 if (prec < 0)
3704 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003705
3706 if ((flags & F_ALT) &&
3707 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003708 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003709 * of issues that cause pain:
3710 * - when 0 is being converted, the C standard leaves off
3711 * the '0x' or '0X', which is inconsistent with other
3712 * %#x/%#X conversions and inconsistent with Python's
3713 * hex() function
3714 * - there are platforms that violate the standard and
3715 * convert 0 with the '0x' or '0X'
3716 * (Metrowerks, Compaq Tru64)
3717 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003718 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003719 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003720 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003721 * We can achieve the desired consistency by inserting our
3722 * own '0x' or '0X' prefix, and substituting %x/%X in place
3723 * of %#x/%#X.
3724 *
3725 * Note that this is the same approach as used in
3726 * formatint() in unicodeobject.c
3727 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003728 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3729 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003730 }
3731 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003732 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3733 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003734 prec, type);
3735 }
3736
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003737 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3738 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003739 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003740 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003741 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003742 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003743 return -1;
3744 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003745 if (sign[0])
3746 PyOS_snprintf(buf, buflen, fmt, -x);
3747 else
3748 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003749 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003750}
3751
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003752static int
Fred Drakeba096332000-07-09 07:04:36 +00003753formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003754{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003755 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003756 if (PyString_Check(v)) {
3757 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003758 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003759 }
3760 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003761 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003762 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003763 }
3764 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003765 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003766}
3767
Guido van Rossum013142a1994-08-30 08:19:36 +00003768
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003769/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3770
3771 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3772 chars are formatted. XXX This is a magic number. Each formatting
3773 routine does bounds checking to ensure no overflow, but a better
3774 solution may be to malloc a buffer of appropriate size for each
3775 format. For now, the current solution is sufficient.
3776*/
3777#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003778
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003779PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003780PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003781{
3782 char *fmt, *res;
3783 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003784 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003785 PyObject *result, *orig_args;
3786#ifdef Py_USING_UNICODE
3787 PyObject *v, *w;
3788#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003789 PyObject *dict = NULL;
3790 if (format == NULL || !PyString_Check(format) || args == NULL) {
3791 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003792 return NULL;
3793 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003794 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003795 fmt = PyString_AS_STRING(format);
3796 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003797 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003798 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003799 if (result == NULL)
3800 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003801 res = PyString_AsString(result);
3802 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003803 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003804 argidx = 0;
3805 }
3806 else {
3807 arglen = -1;
3808 argidx = -2;
3809 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003810 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3811 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003812 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003813 while (--fmtcnt >= 0) {
3814 if (*fmt != '%') {
3815 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003816 rescnt = fmtcnt + 100;
3817 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003818 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003819 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003820 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003821 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003822 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003823 }
3824 *res++ = *fmt++;
3825 }
3826 else {
3827 /* Got a format specifier */
3828 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003829 int width = -1;
3830 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003831 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003832 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003833 PyObject *v = NULL;
3834 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003835 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003836 int sign;
3837 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003838 char formatbuf[FORMATBUFLEN];
3839 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003840#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003841 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003842 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003843#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003844
Guido van Rossumda9c2711996-12-05 21:58:58 +00003845 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003846 if (*fmt == '(') {
3847 char *keystart;
3848 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003849 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003850 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003851
3852 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003853 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003854 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003855 goto error;
3856 }
3857 ++fmt;
3858 --fmtcnt;
3859 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003860 /* Skip over balanced parentheses */
3861 while (pcount > 0 && --fmtcnt >= 0) {
3862 if (*fmt == ')')
3863 --pcount;
3864 else if (*fmt == '(')
3865 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003866 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003867 }
3868 keylen = fmt - keystart - 1;
3869 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003870 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003871 "incomplete format key");
3872 goto error;
3873 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003874 key = PyString_FromStringAndSize(keystart,
3875 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003876 if (key == NULL)
3877 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003878 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003879 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003880 args_owned = 0;
3881 }
3882 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003883 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003884 if (args == NULL) {
3885 goto error;
3886 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003887 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003888 arglen = -1;
3889 argidx = -2;
3890 }
Guido van Rossume5372401993-03-16 12:15:04 +00003891 while (--fmtcnt >= 0) {
3892 switch (c = *fmt++) {
3893 case '-': flags |= F_LJUST; continue;
3894 case '+': flags |= F_SIGN; continue;
3895 case ' ': flags |= F_BLANK; continue;
3896 case '#': flags |= F_ALT; continue;
3897 case '0': flags |= F_ZERO; continue;
3898 }
3899 break;
3900 }
3901 if (c == '*') {
3902 v = getnextarg(args, arglen, &argidx);
3903 if (v == NULL)
3904 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003905 if (!PyInt_Check(v)) {
3906 PyErr_SetString(PyExc_TypeError,
3907 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003908 goto error;
3909 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003910 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003911 if (width < 0) {
3912 flags |= F_LJUST;
3913 width = -width;
3914 }
Guido van Rossume5372401993-03-16 12:15:04 +00003915 if (--fmtcnt >= 0)
3916 c = *fmt++;
3917 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003918 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003919 width = c - '0';
3920 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003921 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003922 if (!isdigit(c))
3923 break;
3924 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003925 PyErr_SetString(
3926 PyExc_ValueError,
3927 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003928 goto error;
3929 }
3930 width = width*10 + (c - '0');
3931 }
3932 }
3933 if (c == '.') {
3934 prec = 0;
3935 if (--fmtcnt >= 0)
3936 c = *fmt++;
3937 if (c == '*') {
3938 v = getnextarg(args, arglen, &argidx);
3939 if (v == NULL)
3940 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003941 if (!PyInt_Check(v)) {
3942 PyErr_SetString(
3943 PyExc_TypeError,
3944 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003945 goto error;
3946 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003947 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003948 if (prec < 0)
3949 prec = 0;
3950 if (--fmtcnt >= 0)
3951 c = *fmt++;
3952 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003953 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003954 prec = c - '0';
3955 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003956 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003957 if (!isdigit(c))
3958 break;
3959 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003960 PyErr_SetString(
3961 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003962 "prec too big");
3963 goto error;
3964 }
3965 prec = prec*10 + (c - '0');
3966 }
3967 }
3968 } /* prec */
3969 if (fmtcnt >= 0) {
3970 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003971 if (--fmtcnt >= 0)
3972 c = *fmt++;
3973 }
3974 }
3975 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003976 PyErr_SetString(PyExc_ValueError,
3977 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003978 goto error;
3979 }
3980 if (c != '%') {
3981 v = getnextarg(args, arglen, &argidx);
3982 if (v == NULL)
3983 goto error;
3984 }
3985 sign = 0;
3986 fill = ' ';
3987 switch (c) {
3988 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003989 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003990 len = 1;
3991 break;
3992 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003993#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003994 if (PyUnicode_Check(v)) {
3995 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003996 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003997 goto unicode;
3998 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003999#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004000 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004001 case 'r':
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004002 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00004003 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004004 else
4005 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004006 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004007 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004008 if (!PyString_Check(temp)) {
Guido van Rossum8052f892002-10-09 19:14:30 +00004009 /* XXX Note: this should never happen,
4010 since PyObject_Repr() and
4011 PyObject_Str() assure this */
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004012 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004013 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004014 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004015 goto error;
4016 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004017 pbuf = PyString_AS_STRING(temp);
4018 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004019 if (prec >= 0 && len > prec)
4020 len = prec;
4021 break;
4022 case 'i':
4023 case 'd':
4024 case 'u':
4025 case 'o':
4026 case 'x':
4027 case 'X':
4028 if (c == 'i')
4029 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004030 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004031 temp = _PyString_FormatLong(v, flags,
4032 prec, c, &pbuf, &len);
4033 if (!temp)
4034 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004035 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004036 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004037 else {
4038 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004039 len = formatint(pbuf,
4040 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004041 flags, prec, c, v);
4042 if (len < 0)
4043 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004044 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004045 }
4046 if (flags & F_ZERO)
4047 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004048 break;
4049 case 'e':
4050 case 'E':
4051 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004052 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004053 case 'g':
4054 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004055 if (c == 'F')
4056 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004057 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004058 len = formatfloat(pbuf, sizeof(formatbuf),
4059 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004060 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004061 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004062 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004063 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004064 fill = '0';
4065 break;
4066 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004067#ifdef Py_USING_UNICODE
4068 if (PyUnicode_Check(v)) {
4069 fmt = fmt_start;
4070 argidx = argidx_start;
4071 goto unicode;
4072 }
4073#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004074 pbuf = formatbuf;
4075 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004076 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004077 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004078 break;
4079 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004080 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004081 "unsupported format character '%c' (0x%x) "
4082 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004083 c, c,
4084 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004085 goto error;
4086 }
4087 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004088 if (*pbuf == '-' || *pbuf == '+') {
4089 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004090 len--;
4091 }
4092 else if (flags & F_SIGN)
4093 sign = '+';
4094 else if (flags & F_BLANK)
4095 sign = ' ';
4096 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004097 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004098 }
4099 if (width < len)
4100 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004101 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004102 reslen -= rescnt;
4103 rescnt = width + fmtcnt + 100;
4104 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004105 if (reslen < 0) {
4106 Py_DECREF(result);
4107 return PyErr_NoMemory();
4108 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004109 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004110 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004111 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004112 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004113 }
4114 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004115 if (fill != ' ')
4116 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004117 rescnt--;
4118 if (width > len)
4119 width--;
4120 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004121 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4122 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004123 assert(pbuf[1] == c);
4124 if (fill != ' ') {
4125 *res++ = *pbuf++;
4126 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004127 }
Tim Petersfff53252001-04-12 18:38:48 +00004128 rescnt -= 2;
4129 width -= 2;
4130 if (width < 0)
4131 width = 0;
4132 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004133 }
4134 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004135 do {
4136 --rescnt;
4137 *res++ = fill;
4138 } while (--width > len);
4139 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004140 if (fill == ' ') {
4141 if (sign)
4142 *res++ = sign;
4143 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004144 (c == 'x' || c == 'X')) {
4145 assert(pbuf[0] == '0');
4146 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004147 *res++ = *pbuf++;
4148 *res++ = *pbuf++;
4149 }
4150 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004151 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004152 res += len;
4153 rescnt -= len;
4154 while (--width >= len) {
4155 --rescnt;
4156 *res++ = ' ';
4157 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004158 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004159 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004160 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004161 goto error;
4162 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004163 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004164 } /* '%' */
4165 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004166 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004167 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004168 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004169 goto error;
4170 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004171 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004172 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004173 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004174 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004175 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004176
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004177#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004178 unicode:
4179 if (args_owned) {
4180 Py_DECREF(args);
4181 args_owned = 0;
4182 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004183 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004184 if (PyTuple_Check(orig_args) && argidx > 0) {
4185 PyObject *v;
4186 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4187 v = PyTuple_New(n);
4188 if (v == NULL)
4189 goto error;
4190 while (--n >= 0) {
4191 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4192 Py_INCREF(w);
4193 PyTuple_SET_ITEM(v, n, w);
4194 }
4195 args = v;
4196 } else {
4197 Py_INCREF(orig_args);
4198 args = orig_args;
4199 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004200 args_owned = 1;
4201 /* Take what we have of the result and let the Unicode formatting
4202 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004203 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004204 if (_PyString_Resize(&result, rescnt))
4205 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004206 fmtcnt = PyString_GET_SIZE(format) - \
4207 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004208 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4209 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004210 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004211 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004212 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004213 if (v == NULL)
4214 goto error;
4215 /* Paste what we have (result) to what the Unicode formatting
4216 function returned (v) and return the result (or error) */
4217 w = PyUnicode_Concat(result, v);
4218 Py_DECREF(result);
4219 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004220 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004221 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004222#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004223
Guido van Rossume5372401993-03-16 12:15:04 +00004224 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004225 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004226 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004227 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004228 }
Guido van Rossume5372401993-03-16 12:15:04 +00004229 return NULL;
4230}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004231
Guido van Rossum2a61e741997-01-18 07:55:05 +00004232void
Fred Drakeba096332000-07-09 07:04:36 +00004233PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004234{
4235 register PyStringObject *s = (PyStringObject *)(*p);
4236 PyObject *t;
4237 if (s == NULL || !PyString_Check(s))
4238 Py_FatalError("PyString_InternInPlace: strings only please!");
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004239 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004240 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004241 if (interned == NULL) {
4242 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004243 if (interned == NULL) {
4244 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004245 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004246 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004247 }
4248 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
4249 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004250 Py_DECREF(*p);
4251 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004252 return;
4253 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004254 /* Ensure that only true string objects appear in the intern dict */
4255 if (!PyString_CheckExact(s)) {
Tim Peters111f6092001-09-12 07:54:51 +00004256 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
4257 PyString_GET_SIZE(s));
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004258 if (t == NULL) {
4259 PyErr_Clear();
4260 return;
Tim Peters111f6092001-09-12 07:54:51 +00004261 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004262 } else {
4263 t = (PyObject*) s;
4264 Py_INCREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004265 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004266
4267 if (PyDict_SetItem(interned, t, t) == 0) {
4268 /* The two references in interned are not counted by
4269 refcnt. The string deallocator will take care of this */
4270 ((PyObject *)t)->ob_refcnt-=2;
4271 PyString_CHECK_INTERNED(t) = SSTATE_INTERNED_MORTAL;
4272 Py_DECREF(*p);
4273 *p = t;
4274 return;
4275 }
4276 Py_DECREF(t);
Guido van Rossum2a61e741997-01-18 07:55:05 +00004277 PyErr_Clear();
4278}
4279
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004280void
4281PyString_InternImmortal(PyObject **p)
4282{
4283 PyString_InternInPlace(p);
4284 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4285 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4286 Py_INCREF(*p);
4287 }
4288}
4289
Guido van Rossum2a61e741997-01-18 07:55:05 +00004290
4291PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004292PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004293{
4294 PyObject *s = PyString_FromString(cp);
4295 if (s == NULL)
4296 return NULL;
4297 PyString_InternInPlace(&s);
4298 return s;
4299}
4300
Guido van Rossum8cf04761997-08-02 02:57:45 +00004301void
Fred Drakeba096332000-07-09 07:04:36 +00004302PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004303{
4304 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004305 for (i = 0; i < UCHAR_MAX + 1; i++) {
4306 Py_XDECREF(characters[i]);
4307 characters[i] = NULL;
4308 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004309 Py_XDECREF(nullstring);
4310 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004311}
Barry Warsawa903ad982001-02-23 16:40:48 +00004312
Barry Warsawa903ad982001-02-23 16:40:48 +00004313void _Py_ReleaseInternedStrings(void)
4314{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004315 PyObject *keys;
4316 PyStringObject *s;
4317 int i, n;
4318
4319 if (interned == NULL || !PyDict_Check(interned))
4320 return;
4321 keys = PyDict_Keys(interned);
4322 if (keys == NULL || !PyList_Check(keys)) {
4323 PyErr_Clear();
4324 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004325 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004326
4327 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4328 detector, interned strings are not forcibly deallocated; rather, we
4329 give them their stolen references back, and then clear and DECREF
4330 the interned dict. */
4331
4332 fprintf(stderr, "releasing interned strings\n");
4333 n = PyList_GET_SIZE(keys);
4334 for (i = 0; i < n; i++) {
4335 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4336 switch (s->ob_sstate) {
4337 case SSTATE_NOT_INTERNED:
4338 /* XXX Shouldn't happen */
4339 break;
4340 case SSTATE_INTERNED_IMMORTAL:
4341 s->ob_refcnt += 1;
4342 break;
4343 case SSTATE_INTERNED_MORTAL:
4344 s->ob_refcnt += 2;
4345 break;
4346 default:
4347 Py_FatalError("Inconsistent interned string state.");
4348 }
4349 s->ob_sstate = SSTATE_NOT_INTERNED;
4350 }
4351 Py_DECREF(keys);
4352 PyDict_Clear(interned);
4353 Py_DECREF(interned);
4354 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004355}