blob: a8e1cb6900c576e0153a3b6cfd5e0402b678c0f4 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000157 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000184
Barry Warsawdadace02001-08-24 18:32:06 +0000185 switch (*f) {
186 case 'c':
187 (void)va_arg(count, int);
188 /* fall through... */
189 case '%':
190 n++;
191 break;
192 case 'd': case 'i': case 'x':
193 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000194 /* 20 bytes is enough to hold a 64-bit
195 integer. Decimal takes the most space.
196 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000197 n += 20;
198 break;
199 case 's':
200 s = va_arg(count, char*);
201 n += strlen(s);
202 break;
203 case 'p':
204 (void) va_arg(count, int);
205 /* maximum 64-bit pointer representation:
206 * 0xffffffffffffffff
207 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000208 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000209 */
210 n += 19;
211 break;
212 default:
213 /* if we stumble upon an unknown
214 formatting code, copy the rest of
215 the format string to the output
216 string. (we cannot just skip the
217 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000218 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 n += strlen(p);
220 goto expand;
221 }
222 } else
223 n++;
224 }
225 expand:
226 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000227 /* Since we've analyzed how much space we need for the worst case,
228 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000229 string = PyString_FromStringAndSize(NULL, n);
230 if (!string)
231 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000232
Barry Warsawdadace02001-08-24 18:32:06 +0000233 s = PyString_AsString(string);
234
235 for (f = format; *f; f++) {
236 if (*f == '%') {
237 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000238 Py_ssize_t i;
239 int longflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000240 /* parse the width.precision part (we're only
241 interested in the precision value, if any) */
242 n = 0;
243 while (isdigit(Py_CHARMASK(*f)))
244 n = (n*10) + *f++ - '0';
245 if (*f == '.') {
246 f++;
247 n = 0;
248 while (isdigit(Py_CHARMASK(*f)))
249 n = (n*10) + *f++ - '0';
250 }
251 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
252 f++;
253 /* handle the long flag, but only for %ld. others
254 can be added when necessary. */
255 if (*f == 'l' && *(f+1) == 'd') {
256 longflag = 1;
257 ++f;
258 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000259
Barry Warsawdadace02001-08-24 18:32:06 +0000260 switch (*f) {
261 case 'c':
262 *s++ = va_arg(vargs, int);
263 break;
264 case 'd':
265 if (longflag)
266 sprintf(s, "%ld", va_arg(vargs, long));
267 else
268 sprintf(s, "%d", va_arg(vargs, int));
269 s += strlen(s);
270 break;
271 case 'i':
272 sprintf(s, "%i", va_arg(vargs, int));
273 s += strlen(s);
274 break;
275 case 'x':
276 sprintf(s, "%x", va_arg(vargs, int));
277 s += strlen(s);
278 break;
279 case 's':
280 p = va_arg(vargs, char*);
281 i = strlen(p);
282 if (n > 0 && i > n)
283 i = n;
284 memcpy(s, p, i);
285 s += i;
286 break;
287 case 'p':
288 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000289 /* %p is ill-defined: ensure leading 0x. */
290 if (s[1] == 'X')
291 s[1] = 'x';
292 else if (s[1] != 'x') {
293 memmove(s+2, s, strlen(s)+1);
294 s[0] = '0';
295 s[1] = 'x';
296 }
Barry Warsawdadace02001-08-24 18:32:06 +0000297 s += strlen(s);
298 break;
299 case '%':
300 *s++ = '%';
301 break;
302 default:
303 strcpy(s, p);
304 s += strlen(s);
305 goto end;
306 }
307 } else
308 *s++ = *f;
309 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000310
Barry Warsawdadace02001-08-24 18:32:06 +0000311 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000312 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000313 return string;
314}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000315
Barry Warsawdadace02001-08-24 18:32:06 +0000316PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000317PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000318{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000319 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000320 va_list vargs;
321
322#ifdef HAVE_STDARG_PROTOTYPES
323 va_start(vargs, format);
324#else
325 va_start(vargs);
326#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000327 ret = PyString_FromFormatV(format, vargs);
328 va_end(vargs);
329 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000330}
331
332
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000333PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000334 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000335 const char *encoding,
336 const char *errors)
337{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000338 PyObject *v, *str;
339
340 str = PyString_FromStringAndSize(s, size);
341 if (str == NULL)
342 return NULL;
343 v = PyString_AsDecodedString(str, encoding, errors);
344 Py_DECREF(str);
345 return v;
346}
347
348PyObject *PyString_AsDecodedObject(PyObject *str,
349 const char *encoding,
350 const char *errors)
351{
352 PyObject *v;
353
354 if (!PyString_Check(str)) {
355 PyErr_BadArgument();
356 goto onError;
357 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000358
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000359 if (encoding == NULL) {
360#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000361 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000362#else
363 PyErr_SetString(PyExc_ValueError, "no encoding specified");
364 goto onError;
365#endif
366 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000367
368 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000369 v = PyCodec_Decode(str, encoding, errors);
370 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000371 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000372
373 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000374
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000375 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000376 return NULL;
377}
378
379PyObject *PyString_AsDecodedString(PyObject *str,
380 const char *encoding,
381 const char *errors)
382{
383 PyObject *v;
384
385 v = PyString_AsDecodedObject(str, encoding, errors);
386 if (v == NULL)
387 goto onError;
388
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000389#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000390 /* Convert Unicode to a string using the default encoding */
391 if (PyUnicode_Check(v)) {
392 PyObject *temp = v;
393 v = PyUnicode_AsEncodedString(v, NULL, NULL);
394 Py_DECREF(temp);
395 if (v == NULL)
396 goto onError;
397 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000398#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399 if (!PyString_Check(v)) {
400 PyErr_Format(PyExc_TypeError,
401 "decoder did not return a string object (type=%.400s)",
402 v->ob_type->tp_name);
403 Py_DECREF(v);
404 goto onError;
405 }
406
407 return v;
408
409 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000410 return NULL;
411}
412
413PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000414 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000415 const char *encoding,
416 const char *errors)
417{
418 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000419
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000420 str = PyString_FromStringAndSize(s, size);
421 if (str == NULL)
422 return NULL;
423 v = PyString_AsEncodedString(str, encoding, errors);
424 Py_DECREF(str);
425 return v;
426}
427
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000428PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000429 const char *encoding,
430 const char *errors)
431{
432 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000433
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000434 if (!PyString_Check(str)) {
435 PyErr_BadArgument();
436 goto onError;
437 }
438
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000439 if (encoding == NULL) {
440#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000442#else
443 PyErr_SetString(PyExc_ValueError, "no encoding specified");
444 goto onError;
445#endif
446 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000447
448 /* Encode via the codec registry */
449 v = PyCodec_Encode(str, encoding, errors);
450 if (v == NULL)
451 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000452
453 return v;
454
455 onError:
456 return NULL;
457}
458
459PyObject *PyString_AsEncodedString(PyObject *str,
460 const char *encoding,
461 const char *errors)
462{
463 PyObject *v;
464
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000465 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000466 if (v == NULL)
467 goto onError;
468
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000469#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470 /* Convert Unicode to a string using the default encoding */
471 if (PyUnicode_Check(v)) {
472 PyObject *temp = v;
473 v = PyUnicode_AsEncodedString(v, NULL, NULL);
474 Py_DECREF(temp);
475 if (v == NULL)
476 goto onError;
477 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000478#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000479 if (!PyString_Check(v)) {
480 PyErr_Format(PyExc_TypeError,
481 "encoder did not return a string object (type=%.400s)",
482 v->ob_type->tp_name);
483 Py_DECREF(v);
484 goto onError;
485 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000486
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000487 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000488
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000489 onError:
490 return NULL;
491}
492
Guido van Rossum234f9421993-06-17 12:35:49 +0000493static void
Fred Drakeba096332000-07-09 07:04:36 +0000494string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000495{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000496 switch (PyString_CHECK_INTERNED(op)) {
497 case SSTATE_NOT_INTERNED:
498 break;
499
500 case SSTATE_INTERNED_MORTAL:
501 /* revive dead object temporarily for DelItem */
502 op->ob_refcnt = 3;
503 if (PyDict_DelItem(interned, op) != 0)
504 Py_FatalError(
505 "deletion of interned string failed");
506 break;
507
508 case SSTATE_INTERNED_IMMORTAL:
509 Py_FatalError("Immortal interned string died.");
510
511 default:
512 Py_FatalError("Inconsistent interned string state.");
513 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000514 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000515}
516
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000517/* Unescape a backslash-escaped string. If unicode is non-zero,
518 the string is a u-literal. If recode_encoding is non-zero,
519 the string is UTF-8 encoded and should be re-encoded in the
520 specified encoding. */
521
522PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000523 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000524 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000525 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000526 const char *recode_encoding)
527{
528 int c;
529 char *p, *buf;
530 const char *end;
531 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000532 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000533 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000534 if (v == NULL)
535 return NULL;
536 p = buf = PyString_AsString(v);
537 end = s + len;
538 while (s < end) {
539 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000540 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000541#ifdef Py_USING_UNICODE
542 if (recode_encoding && (*s & 0x80)) {
543 PyObject *u, *w;
544 char *r;
545 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 t = s;
548 /* Decode non-ASCII bytes as UTF-8. */
549 while (t < end && (*t & 0x80)) t++;
550 u = PyUnicode_DecodeUTF8(s, t - s, errors);
551 if(!u) goto failed;
552
553 /* Recode them in target encoding. */
554 w = PyUnicode_AsEncodedString(
555 u, recode_encoding, errors);
556 Py_DECREF(u);
557 if (!w) goto failed;
558
559 /* Append bytes to output buffer. */
560 r = PyString_AsString(w);
561 rn = PyString_Size(w);
562 memcpy(p, r, rn);
563 p += rn;
564 Py_DECREF(w);
565 s = t;
566 } else {
567 *p++ = *s++;
568 }
569#else
570 *p++ = *s++;
571#endif
572 continue;
573 }
574 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000575 if (s==end) {
576 PyErr_SetString(PyExc_ValueError,
577 "Trailing \\ in string");
578 goto failed;
579 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000580 switch (*s++) {
581 /* XXX This assumes ASCII! */
582 case '\n': break;
583 case '\\': *p++ = '\\'; break;
584 case '\'': *p++ = '\''; break;
585 case '\"': *p++ = '\"'; break;
586 case 'b': *p++ = '\b'; break;
587 case 'f': *p++ = '\014'; break; /* FF */
588 case 't': *p++ = '\t'; break;
589 case 'n': *p++ = '\n'; break;
590 case 'r': *p++ = '\r'; break;
591 case 'v': *p++ = '\013'; break; /* VT */
592 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
593 case '0': case '1': case '2': case '3':
594 case '4': case '5': case '6': case '7':
595 c = s[-1] - '0';
596 if ('0' <= *s && *s <= '7') {
597 c = (c<<3) + *s++ - '0';
598 if ('0' <= *s && *s <= '7')
599 c = (c<<3) + *s++ - '0';
600 }
601 *p++ = c;
602 break;
603 case 'x':
604 if (isxdigit(Py_CHARMASK(s[0]))
605 && isxdigit(Py_CHARMASK(s[1]))) {
606 unsigned int x = 0;
607 c = Py_CHARMASK(*s);
608 s++;
609 if (isdigit(c))
610 x = c - '0';
611 else if (islower(c))
612 x = 10 + c - 'a';
613 else
614 x = 10 + c - 'A';
615 x = x << 4;
616 c = Py_CHARMASK(*s);
617 s++;
618 if (isdigit(c))
619 x += c - '0';
620 else if (islower(c))
621 x += 10 + c - 'a';
622 else
623 x += 10 + c - 'A';
624 *p++ = x;
625 break;
626 }
627 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 PyErr_SetString(PyExc_ValueError,
629 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000630 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 }
632 if (strcmp(errors, "replace") == 0) {
633 *p++ = '?';
634 } else if (strcmp(errors, "ignore") == 0)
635 /* do nothing */;
636 else {
637 PyErr_Format(PyExc_ValueError,
638 "decoding error; "
639 "unknown error handling code: %.400s",
640 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000641 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000642 }
643#ifndef Py_USING_UNICODE
644 case 'u':
645 case 'U':
646 case 'N':
647 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000648 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000649 "Unicode escapes not legal "
650 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000651 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 }
653#endif
654 default:
655 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000656 s--;
657 goto non_esc; /* an arbitry number of unescaped
658 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000659 }
660 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000661 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000662 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 return v;
664 failed:
665 Py_DECREF(v);
666 return NULL;
667}
668
Martin v. Löwis18e16552006-02-15 17:27:45 +0000669static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000670string_getsize(register PyObject *op)
671{
672 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000673 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000674 if (PyString_AsStringAndSize(op, &s, &len))
675 return -1;
676 return len;
677}
678
679static /*const*/ char *
680string_getbuffer(register PyObject *op)
681{
682 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000683 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000684 if (PyString_AsStringAndSize(op, &s, &len))
685 return NULL;
686 return s;
687}
688
Martin v. Löwis18e16552006-02-15 17:27:45 +0000689Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000690PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000691{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000692 if (!PyString_Check(op))
693 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000694 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695}
696
697/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000698PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000699{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (!PyString_Check(op))
701 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000702 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703}
704
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705int
706PyString_AsStringAndSize(register PyObject *obj,
707 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000708 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000709{
710 if (s == NULL) {
711 PyErr_BadInternalCall();
712 return -1;
713 }
714
715 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000716#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000717 if (PyUnicode_Check(obj)) {
718 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
719 if (obj == NULL)
720 return -1;
721 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000722 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000723#endif
724 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000725 PyErr_Format(PyExc_TypeError,
726 "expected string or Unicode object, "
727 "%.200s found", obj->ob_type->tp_name);
728 return -1;
729 }
730 }
731
732 *s = PyString_AS_STRING(obj);
733 if (len != NULL)
734 *len = PyString_GET_SIZE(obj);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000735 else if (strlen(*s) != PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000736 PyErr_SetString(PyExc_TypeError,
737 "expected string without null bytes");
738 return -1;
739 }
740 return 0;
741}
742
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000743/* Methods */
744
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000745static int
Fred Drakeba096332000-07-09 07:04:36 +0000746string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000748 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000749 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000750 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000751
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000752 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000753 if (! PyString_CheckExact(op)) {
754 int ret;
755 /* A str subclass may have its own __str__ method. */
756 op = (PyStringObject *) PyObject_Str((PyObject *)op);
757 if (op == NULL)
758 return -1;
759 ret = string_print(op, fp, flags);
760 Py_DECREF(op);
761 return ret;
762 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000763 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000764#ifdef __VMS
765 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
766#else
767 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
768#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000769 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771
Thomas Wouters7e474022000-07-16 12:04:32 +0000772 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000773 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000774 if (memchr(op->ob_sval, '\'', op->ob_size) &&
775 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000776 quote = '"';
777
778 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000779 for (i = 0; i < op->ob_size; i++) {
780 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000781 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000782 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000783 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000784 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000785 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000786 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000787 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000788 fprintf(fp, "\\r");
789 else if (c < ' ' || c >= 0x7f)
790 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000791 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000792 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000796}
797
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000798PyObject *
799PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000801 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000802 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000803 PyObject *v;
804 if (newsize > INT_MAX) {
805 PyErr_SetString(PyExc_OverflowError,
806 "string is too large to make repr");
807 }
808 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000810 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000811 }
812 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000813 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814 register char c;
815 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 int quote;
817
Thomas Wouters7e474022000-07-16 12:04:32 +0000818 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000820 if (smartquotes &&
821 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000822 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000823 quote = '"';
824
Tim Peters9161c8b2001-12-03 01:55:38 +0000825 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000828 /* There's at least enough room for a hex escape
829 and a closing quote. */
830 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000831 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000833 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000834 else if (c == '\t')
835 *p++ = '\\', *p++ = 't';
836 else if (c == '\n')
837 *p++ = '\\', *p++ = 'n';
838 else if (c == '\r')
839 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000840 else if (c < ' ' || c >= 0x7f) {
841 /* For performance, we don't want to call
842 PyOS_snprintf here (extra layers of
843 function call). */
844 sprintf(p, "\\x%02x", c & 0xff);
845 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 else
848 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000849 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000850 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000851 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000853 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000854 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000855 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857}
858
Guido van Rossum189f1df2001-05-01 16:51:53 +0000859static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000860string_repr(PyObject *op)
861{
862 return PyString_Repr(op, 1);
863}
864
865static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000866string_str(PyObject *s)
867{
Tim Petersc9933152001-10-16 20:18:24 +0000868 assert(PyString_Check(s));
869 if (PyString_CheckExact(s)) {
870 Py_INCREF(s);
871 return s;
872 }
873 else {
874 /* Subtype -- return genuine string with the same value. */
875 PyStringObject *t = (PyStringObject *) s;
876 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
877 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000878}
879
Martin v. Löwis18e16552006-02-15 17:27:45 +0000880static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000881string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882{
883 return a->ob_size;
884}
885
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000886static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000887string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000889 register size_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000890 register PyStringObject *op;
891 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000892#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000893 if (PyUnicode_Check(bb))
894 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000895#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000896 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000897 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000898 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 return NULL;
900 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000901#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000902 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000903 if ((a->ob_size == 0 || b->ob_size == 0) &&
904 PyString_CheckExact(a) && PyString_CheckExact(b)) {
905 if (a->ob_size == 0) {
906 Py_INCREF(bb);
907 return bb;
908 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000909 Py_INCREF(a);
910 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
912 size = a->ob_size + b->ob_size;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000913 /* XXX check overflow */
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000914 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000915 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000916 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000917 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000918 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000919 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000920 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000921 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
922 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000923 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000924 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925#undef b
926}
927
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000928static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000929string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000930{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000931 register Py_ssize_t i;
932 register Py_ssize_t j;
933 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000934 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000935 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000936 if (n < 0)
937 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000938 /* watch out for overflows: the size can overflow int,
939 * and the # of bytes needed can overflow size_t
940 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000941 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000942 if (n && size / n != a->ob_size) {
943 PyErr_SetString(PyExc_OverflowError,
944 "repeated string is too long");
945 return NULL;
946 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000947 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000948 Py_INCREF(a);
949 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000950 }
Tim Peterse7c05322004-06-27 17:24:49 +0000951 nbytes = (size_t)size;
952 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000953 PyErr_SetString(PyExc_OverflowError,
954 "repeated string is too long");
955 return NULL;
956 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000958 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000959 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000960 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000961 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000962 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000963 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000964 op->ob_sval[size] = '\0';
965 if (a->ob_size == 1 && n > 0) {
966 memset(op->ob_sval, a->ob_sval[0] , n);
967 return (PyObject *) op;
968 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000969 i = 0;
970 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000971 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
972 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000973 }
974 while (i < size) {
975 j = (i <= size-i) ? i : size-i;
976 memcpy(op->ob_sval+i, op->ob_sval, j);
977 i += j;
978 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000979 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000980}
981
982/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
983
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000984static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000985string_slice(register PyStringObject *a, register Py_ssize_t i,
986 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +0000987 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000988{
989 if (i < 0)
990 i = 0;
991 if (j < 0)
992 j = 0; /* Avoid signed/unsigned bug in next line */
993 if (j > a->ob_size)
994 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000995 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
996 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997 Py_INCREF(a);
998 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000999 }
1000 if (j < i)
1001 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001002 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003}
1004
Guido van Rossum9284a572000-03-07 15:53:43 +00001005static int
Fred Drakeba096332000-07-09 07:04:36 +00001006string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001007{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001008 char *s = PyString_AS_STRING(a);
1009 const char *sub = PyString_AS_STRING(el);
1010 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001011 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001012 int shortsub;
1013 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001014
1015 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001016#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001017 if (PyUnicode_Check(el))
1018 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001019#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001020 if (!PyString_Check(el)) {
1021 PyErr_SetString(PyExc_TypeError,
1022 "'in <string>' requires string as left operand");
1023 return -1;
1024 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001025 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001026
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001027 if (len_sub == 0)
1028 return 1;
1029 /* last points to one char beyond the start of the rightmost
1030 substring. When s<last, there is still room for a possible match
1031 and s[0] through s[len_sub-1] will be in bounds.
1032 shortsub is len_sub minus the last character which is checked
1033 separately just before the memcmp(). That check helps prevent
1034 false starts and saves the setup time for memcmp().
1035 */
1036 firstchar = sub[0];
1037 shortsub = len_sub - 1;
1038 lastchar = sub[shortsub];
1039 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1040 while (s < last) {
1041 s = memchr(s, firstchar, last-s);
1042 if (s == NULL)
1043 return 0;
1044 assert(s < last);
1045 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001046 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001047 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001048 }
1049 return 0;
1050}
1051
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001052static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001053string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001054{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001056 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001057 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001058 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059 return NULL;
1060 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001061 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001062 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001063 if (v == NULL)
1064 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001065 else {
1066#ifdef COUNT_ALLOCS
1067 one_strings++;
1068#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001069 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001070 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001071 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001072}
1073
Martin v. Löwiscd353062001-05-24 16:56:35 +00001074static PyObject*
1075string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001076{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001077 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001078 Py_ssize_t len_a, len_b;
1079 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001080 PyObject *result;
1081
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001082 /* Make sure both arguments are strings. */
1083 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001084 result = Py_NotImplemented;
1085 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001086 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001087 if (a == b) {
1088 switch (op) {
1089 case Py_EQ:case Py_LE:case Py_GE:
1090 result = Py_True;
1091 goto out;
1092 case Py_NE:case Py_LT:case Py_GT:
1093 result = Py_False;
1094 goto out;
1095 }
1096 }
1097 if (op == Py_EQ) {
1098 /* Supporting Py_NE here as well does not save
1099 much time, since Py_NE is rarely used. */
1100 if (a->ob_size == b->ob_size
1101 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001102 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001103 a->ob_size) == 0)) {
1104 result = Py_True;
1105 } else {
1106 result = Py_False;
1107 }
1108 goto out;
1109 }
1110 len_a = a->ob_size; len_b = b->ob_size;
1111 min_len = (len_a < len_b) ? len_a : len_b;
1112 if (min_len > 0) {
1113 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1114 if (c==0)
1115 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1116 }else
1117 c = 0;
1118 if (c == 0)
1119 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1120 switch (op) {
1121 case Py_LT: c = c < 0; break;
1122 case Py_LE: c = c <= 0; break;
1123 case Py_EQ: assert(0); break; /* unreachable */
1124 case Py_NE: c = c != 0; break;
1125 case Py_GT: c = c > 0; break;
1126 case Py_GE: c = c >= 0; break;
1127 default:
1128 result = Py_NotImplemented;
1129 goto out;
1130 }
1131 result = c ? Py_True : Py_False;
1132 out:
1133 Py_INCREF(result);
1134 return result;
1135}
1136
1137int
1138_PyString_Eq(PyObject *o1, PyObject *o2)
1139{
1140 PyStringObject *a, *b;
1141 a = (PyStringObject*)o1;
1142 b = (PyStringObject*)o2;
1143 return a->ob_size == b->ob_size
1144 && *a->ob_sval == *b->ob_sval
1145 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001146}
1147
Guido van Rossum9bfef441993-03-29 10:43:31 +00001148static long
Fred Drakeba096332000-07-09 07:04:36 +00001149string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001150{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001151 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001152 register unsigned char *p;
1153 register long x;
1154
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001155 if (a->ob_shash != -1)
1156 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001157 len = a->ob_size;
1158 p = (unsigned char *) a->ob_sval;
1159 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001160 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001161 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001162 x ^= a->ob_size;
1163 if (x == -1)
1164 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001165 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001166 return x;
1167}
1168
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001169static PyObject*
1170string_subscript(PyStringObject* self, PyObject* item)
1171{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001172 if (PyInt_Check(item) || PyLong_Check(item)) {
1173 Py_ssize_t i = PyInt_AsSsize_t(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001174 if (i == -1 && PyErr_Occurred())
1175 return NULL;
1176 if (i < 0)
1177 i += PyString_GET_SIZE(self);
1178 return string_item(self,i);
1179 }
1180 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001181 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001182 char* source_buf;
1183 char* result_buf;
1184 PyObject* result;
1185
1186 if (PySlice_GetIndicesEx((PySliceObject*)item,
1187 PyString_GET_SIZE(self),
1188 &start, &stop, &step, &slicelength) < 0) {
1189 return NULL;
1190 }
1191
1192 if (slicelength <= 0) {
1193 return PyString_FromStringAndSize("", 0);
1194 }
1195 else {
1196 source_buf = PyString_AsString((PyObject*)self);
1197 result_buf = PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001198 if (result_buf == NULL)
1199 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001200
1201 for (cur = start, i = 0; i < slicelength;
1202 cur += step, i++) {
1203 result_buf[i] = source_buf[cur];
1204 }
1205
1206 result = PyString_FromStringAndSize(result_buf,
1207 slicelength);
1208 PyMem_Free(result_buf);
1209 return result;
1210 }
1211 }
1212 else {
1213 PyErr_SetString(PyExc_TypeError,
1214 "string indices must be integers");
1215 return NULL;
1216 }
1217}
1218
Martin v. Löwis18e16552006-02-15 17:27:45 +00001219static Py_ssize_t
1220string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001221{
1222 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001223 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001224 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001225 return -1;
1226 }
1227 *ptr = (void *)self->ob_sval;
1228 return self->ob_size;
1229}
1230
Martin v. Löwis18e16552006-02-15 17:27:45 +00001231static Py_ssize_t
1232string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001233{
Guido van Rossum045e6881997-09-08 18:30:11 +00001234 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001235 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001236 return -1;
1237}
1238
Martin v. Löwis18e16552006-02-15 17:27:45 +00001239static Py_ssize_t
1240string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001241{
1242 if ( lenp )
1243 *lenp = self->ob_size;
1244 return 1;
1245}
1246
Martin v. Löwis18e16552006-02-15 17:27:45 +00001247static Py_ssize_t
1248string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001249{
1250 if ( index != 0 ) {
1251 PyErr_SetString(PyExc_SystemError,
1252 "accessing non-existent string segment");
1253 return -1;
1254 }
1255 *ptr = self->ob_sval;
1256 return self->ob_size;
1257}
1258
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001259static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001260 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001261 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001262 (ssizeargfunc)string_repeat, /*sq_repeat*/
1263 (ssizeargfunc)string_item, /*sq_item*/
1264 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001265 0, /*sq_ass_item*/
1266 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001267 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001268};
1269
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001270static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001271 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001272 (binaryfunc)string_subscript,
1273 0,
1274};
1275
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001276static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001277 (readbufferproc)string_buffer_getreadbuf,
1278 (writebufferproc)string_buffer_getwritebuf,
1279 (segcountproc)string_buffer_getsegcount,
1280 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001281};
1282
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001283
1284
1285#define LEFTSTRIP 0
1286#define RIGHTSTRIP 1
1287#define BOTHSTRIP 2
1288
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001289/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001290static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1291
1292#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001293
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001294#define SPLIT_APPEND(data, left, right) \
1295 str = PyString_FromStringAndSize((data) + (left), \
1296 (right) - (left)); \
1297 if (str == NULL) \
1298 goto onError; \
1299 if (PyList_Append(list, str)) { \
1300 Py_DECREF(str); \
1301 goto onError; \
1302 } \
1303 else \
1304 Py_DECREF(str);
1305
1306#define SPLIT_INSERT(data, left, right) \
1307 str = PyString_FromStringAndSize((data) + (left), \
1308 (right) - (left)); \
1309 if (str == NULL) \
1310 goto onError; \
1311 if (PyList_Insert(list, 0, str)) { \
1312 Py_DECREF(str); \
1313 goto onError; \
1314 } \
1315 else \
1316 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317
1318static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001319split_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001321 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001322 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323 PyObject *list = PyList_New(0);
1324
1325 if (list == NULL)
1326 return NULL;
1327
Guido van Rossum4c08d552000-03-10 22:55:18 +00001328 for (i = j = 0; i < len; ) {
1329 while (i < len && isspace(Py_CHARMASK(s[i])))
1330 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001331 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 while (i < len && !isspace(Py_CHARMASK(s[i])))
1333 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335 if (maxsplit-- <= 0)
1336 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001338 while (i < len && isspace(Py_CHARMASK(s[i])))
1339 i++;
1340 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341 }
1342 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001343 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001344 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001345 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001347 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348 Py_DECREF(list);
1349 return NULL;
1350}
1351
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001352static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001353split_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001354{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001355 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001356 PyObject *str;
1357 PyObject *list = PyList_New(0);
1358
1359 if (list == NULL)
1360 return NULL;
1361
1362 for (i = j = 0; i < len; ) {
1363 if (s[i] == ch) {
1364 if (maxcount-- <= 0)
1365 break;
1366 SPLIT_APPEND(s, j, i);
1367 i = j = i + 1;
1368 } else
1369 i++;
1370 }
1371 if (j <= len) {
1372 SPLIT_APPEND(s, j, len);
1373 }
1374 return list;
1375
1376 onError:
1377 Py_DECREF(list);
1378 return NULL;
1379}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001380
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001381PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001382"S.split([sep [,maxsplit]]) -> list of strings\n\
1383\n\
1384Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001385delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001386splits are done. If sep is not specified or is None, any\n\
1387whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001388
1389static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001390string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001392 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1393 int err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001394 int maxsplit = -1;
1395 const char *s = PyString_AS_STRING(self), *sub;
1396 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001397
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001400 if (maxsplit < 0)
1401 maxsplit = INT_MAX;
1402 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001404 if (PyString_Check(subobj)) {
1405 sub = PyString_AS_STRING(subobj);
1406 n = PyString_GET_SIZE(subobj);
1407 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001408#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001409 else if (PyUnicode_Check(subobj))
1410 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001411#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001412 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1413 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001414
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 if (n == 0) {
1416 PyErr_SetString(PyExc_ValueError, "empty separator");
1417 return NULL;
1418 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001419 else if (n == 1)
1420 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421
1422 list = PyList_New(0);
1423 if (list == NULL)
1424 return NULL;
1425
1426 i = j = 0;
1427 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001428 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001429 if (maxsplit-- <= 0)
1430 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001431 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432 if (item == NULL)
1433 goto fail;
1434 err = PyList_Append(list, item);
1435 Py_DECREF(item);
1436 if (err < 0)
1437 goto fail;
1438 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439 }
1440 else
1441 i++;
1442 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001443 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001444 if (item == NULL)
1445 goto fail;
1446 err = PyList_Append(list, item);
1447 Py_DECREF(item);
1448 if (err < 0)
1449 goto fail;
1450
1451 return list;
1452
1453 fail:
1454 Py_DECREF(list);
1455 return NULL;
1456}
1457
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001458static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001459rsplit_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001460{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001461 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001462 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001463 PyObject *list = PyList_New(0);
1464
1465 if (list == NULL)
1466 return NULL;
1467
1468 for (i = j = len - 1; i >= 0; ) {
1469 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1470 i--;
1471 j = i;
1472 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1473 i--;
1474 if (j > i) {
1475 if (maxsplit-- <= 0)
1476 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001477 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001478 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1479 i--;
1480 j = i;
1481 }
1482 }
1483 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001484 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001485 }
1486 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001487 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001488 Py_DECREF(list);
1489 return NULL;
1490}
1491
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001492static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001493rsplit_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001494{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001495 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001496 PyObject *str;
1497 PyObject *list = PyList_New(0);
1498
1499 if (list == NULL)
1500 return NULL;
1501
1502 for (i = j = len - 1; i >= 0; ) {
1503 if (s[i] == ch) {
1504 if (maxcount-- <= 0)
1505 break;
1506 SPLIT_INSERT(s, i + 1, j + 1);
1507 j = i = i - 1;
1508 } else
1509 i--;
1510 }
1511 if (j >= -1) {
1512 SPLIT_INSERT(s, 0, j + 1);
1513 }
1514 return list;
1515
1516 onError:
1517 Py_DECREF(list);
1518 return NULL;
1519}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001520
1521PyDoc_STRVAR(rsplit__doc__,
1522"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1523\n\
1524Return a list of the words in the string S, using sep as the\n\
1525delimiter string, starting at the end of the string and working\n\
1526to the front. If maxsplit is given, at most maxsplit splits are\n\
1527done. If sep is not specified or is None, any whitespace string\n\
1528is a separator.");
1529
1530static PyObject *
1531string_rsplit(PyStringObject *self, PyObject *args)
1532{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001533 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1534 int err;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001535 int maxsplit = -1;
1536 const char *s = PyString_AS_STRING(self), *sub;
1537 PyObject *list, *item, *subobj = Py_None;
1538
1539 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1540 return NULL;
1541 if (maxsplit < 0)
1542 maxsplit = INT_MAX;
1543 if (subobj == Py_None)
1544 return rsplit_whitespace(s, len, maxsplit);
1545 if (PyString_Check(subobj)) {
1546 sub = PyString_AS_STRING(subobj);
1547 n = PyString_GET_SIZE(subobj);
1548 }
1549#ifdef Py_USING_UNICODE
1550 else if (PyUnicode_Check(subobj))
1551 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1552#endif
1553 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1554 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001555
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001556 if (n == 0) {
1557 PyErr_SetString(PyExc_ValueError, "empty separator");
1558 return NULL;
1559 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001560 else if (n == 1)
1561 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001562
1563 list = PyList_New(0);
1564 if (list == NULL)
1565 return NULL;
1566
1567 j = len;
1568 i = j - n;
1569 while (i >= 0) {
1570 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1571 if (maxsplit-- <= 0)
1572 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001573 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001574 if (item == NULL)
1575 goto fail;
1576 err = PyList_Insert(list, 0, item);
1577 Py_DECREF(item);
1578 if (err < 0)
1579 goto fail;
1580 j = i;
1581 i -= n;
1582 }
1583 else
1584 i--;
1585 }
1586 item = PyString_FromStringAndSize(s, j);
1587 if (item == NULL)
1588 goto fail;
1589 err = PyList_Insert(list, 0, item);
1590 Py_DECREF(item);
1591 if (err < 0)
1592 goto fail;
1593
1594 return list;
1595
1596 fail:
1597 Py_DECREF(list);
1598 return NULL;
1599}
1600
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001601
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001602PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603"S.join(sequence) -> string\n\
1604\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001605Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001606sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001607
1608static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001609string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610{
1611 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001612 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001615 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001616 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001617 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001618 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619
Tim Peters19fe14e2001-01-19 03:03:47 +00001620 seq = PySequence_Fast(orig, "");
1621 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001622 return NULL;
1623 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001624
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001625 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001626 if (seqlen == 0) {
1627 Py_DECREF(seq);
1628 return PyString_FromString("");
1629 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001631 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001632 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1633 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001634 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001635 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001636 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001637 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638
Raymond Hettinger674f2412004-08-23 23:23:54 +00001639 /* There are at least two things to join, or else we have a subclass
1640 * of the builtin types in the sequence.
1641 * Do a pre-pass to figure out the total amount of space we'll
1642 * need (sz), see whether any argument is absurd, and defer to
1643 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001644 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001645 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001646 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001647 item = PySequence_Fast_GET_ITEM(seq, i);
1648 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001649#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001650 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001651 /* Defer to Unicode join.
1652 * CAUTION: There's no gurantee that the
1653 * original sequence can be iterated over
1654 * again, so we must pass seq here.
1655 */
1656 PyObject *result;
1657 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001658 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001659 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001660 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001661#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001662 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001663 "sequence item %i: expected string,"
1664 " %.80s found",
Martin v. Löwis18e16552006-02-15 17:27:45 +00001665 /*XXX*/(int)i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001666 Py_DECREF(seq);
1667 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001668 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001669 sz += PyString_GET_SIZE(item);
1670 if (i != 0)
1671 sz += seplen;
1672 if (sz < old_sz || sz > INT_MAX) {
1673 PyErr_SetString(PyExc_OverflowError,
1674 "join() is too long for a Python string");
1675 Py_DECREF(seq);
1676 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001677 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001678 }
1679
1680 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001681 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001682 if (res == NULL) {
1683 Py_DECREF(seq);
1684 return NULL;
1685 }
1686
1687 /* Catenate everything. */
1688 p = PyString_AS_STRING(res);
1689 for (i = 0; i < seqlen; ++i) {
1690 size_t n;
1691 item = PySequence_Fast_GET_ITEM(seq, i);
1692 n = PyString_GET_SIZE(item);
1693 memcpy(p, PyString_AS_STRING(item), n);
1694 p += n;
1695 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001696 memcpy(p, sep, seplen);
1697 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001698 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001699 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001700
Jeremy Hylton49048292000-07-11 03:28:17 +00001701 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001702 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703}
1704
Tim Peters52e155e2001-06-16 05:42:57 +00001705PyObject *
1706_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001707{
Tim Petersa7259592001-06-16 05:11:17 +00001708 assert(sep != NULL && PyString_Check(sep));
1709 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001710 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001711}
1712
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001713static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001714string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001715{
1716 if (*end > len)
1717 *end = len;
1718 else if (*end < 0)
1719 *end += len;
1720 if (*end < 0)
1721 *end = 0;
1722 if (*start < 0)
1723 *start += len;
1724 if (*start < 0)
1725 *start = 0;
1726}
1727
Martin v. Löwis18e16552006-02-15 17:27:45 +00001728static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001729string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001731 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001732 Py_ssize_t len = PyString_GET_SIZE(self);
1733 Py_ssize_t n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001734 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735
Martin v. Löwis18e16552006-02-15 17:27:45 +00001736 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001737 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001738 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001739 return -2;
1740 if (PyString_Check(subobj)) {
1741 sub = PyString_AS_STRING(subobj);
1742 n = PyString_GET_SIZE(subobj);
1743 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001744#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001745 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001746 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001747#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749 return -2;
1750
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001751 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753 if (dir > 0) {
1754 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001756 last -= n;
1757 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001758 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001759 return (long)i;
1760 }
1761 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001762 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001763
Guido van Rossum4c08d552000-03-10 22:55:18 +00001764 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001765 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001766 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001767 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001768 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001769 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001770
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001771 return -1;
1772}
1773
1774
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001775PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776"S.find(sub [,start [,end]]) -> int\n\
1777\n\
1778Return the lowest index in S where substring sub is found,\n\
1779such that sub is contained within s[start,end]. Optional\n\
1780arguments start and end are interpreted as in slice notation.\n\
1781\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001782Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783
1784static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001785string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001787 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001788 if (result == -2)
1789 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001790 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791}
1792
1793
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001794PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795"S.index(sub [,start [,end]]) -> int\n\
1796\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001797Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798
1799static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001800string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001802 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 if (result == -2)
1804 return NULL;
1805 if (result == -1) {
1806 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001807 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001808 return NULL;
1809 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001810 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811}
1812
1813
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001814PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815"S.rfind(sub [,start [,end]]) -> int\n\
1816\n\
1817Return the highest index in S where substring sub is found,\n\
1818such that sub is contained within s[start,end]. Optional\n\
1819arguments start and end are interpreted as in slice notation.\n\
1820\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001821Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822
1823static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001824string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001826 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 if (result == -2)
1828 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001829 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830}
1831
1832
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001833PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834"S.rindex(sub [,start [,end]]) -> int\n\
1835\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001836Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001837
1838static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001839string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001841 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842 if (result == -2)
1843 return NULL;
1844 if (result == -1) {
1845 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001846 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 return NULL;
1848 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001849 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850}
1851
1852
1853static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001854do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1855{
1856 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001857 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001858 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001859 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1860 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001861
1862 i = 0;
1863 if (striptype != RIGHTSTRIP) {
1864 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1865 i++;
1866 }
1867 }
1868
1869 j = len;
1870 if (striptype != LEFTSTRIP) {
1871 do {
1872 j--;
1873 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1874 j++;
1875 }
1876
1877 if (i == 0 && j == len && PyString_CheckExact(self)) {
1878 Py_INCREF(self);
1879 return (PyObject*)self;
1880 }
1881 else
1882 return PyString_FromStringAndSize(s+i, j-i);
1883}
1884
1885
1886static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001887do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888{
1889 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001890 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 i = 0;
1893 if (striptype != RIGHTSTRIP) {
1894 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1895 i++;
1896 }
1897 }
1898
1899 j = len;
1900 if (striptype != LEFTSTRIP) {
1901 do {
1902 j--;
1903 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1904 j++;
1905 }
1906
Tim Peters8fa5dd02001-09-12 02:18:30 +00001907 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 Py_INCREF(self);
1909 return (PyObject*)self;
1910 }
1911 else
1912 return PyString_FromStringAndSize(s+i, j-i);
1913}
1914
1915
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001916static PyObject *
1917do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1918{
1919 PyObject *sep = NULL;
1920
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001921 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001922 return NULL;
1923
1924 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001925 if (PyString_Check(sep))
1926 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001927#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001928 else if (PyUnicode_Check(sep)) {
1929 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1930 PyObject *res;
1931 if (uniself==NULL)
1932 return NULL;
1933 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1934 striptype, sep);
1935 Py_DECREF(uniself);
1936 return res;
1937 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001938#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001939 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001940 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001941#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001942 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001943#else
1944 "%s arg must be None or str",
1945#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001946 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001947 return NULL;
1948 }
1949 return do_xstrip(self, striptype, sep);
1950 }
1951
1952 return do_strip(self, striptype);
1953}
1954
1955
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001956PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001957"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958\n\
1959Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001960whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001961If chars is given and not None, remove characters in chars instead.\n\
1962If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963
1964static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001965string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001967 if (PyTuple_GET_SIZE(args) == 0)
1968 return do_strip(self, BOTHSTRIP); /* Common case */
1969 else
1970 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001971}
1972
1973
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001974PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001975"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001976\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001977Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001978If chars is given and not None, remove characters in chars instead.\n\
1979If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980
1981static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001982string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001983{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001984 if (PyTuple_GET_SIZE(args) == 0)
1985 return do_strip(self, LEFTSTRIP); /* Common case */
1986 else
1987 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988}
1989
1990
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001991PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001992"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001994Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001995If chars is given and not None, remove characters in chars instead.\n\
1996If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997
1998static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001999string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002001 if (PyTuple_GET_SIZE(args) == 0)
2002 return do_strip(self, RIGHTSTRIP); /* Common case */
2003 else
2004 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005}
2006
2007
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002008PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009"S.lower() -> string\n\
2010\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002011Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012
2013static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002014string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015{
2016 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002017 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018 PyObject *new;
2019
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020 new = PyString_FromStringAndSize(NULL, n);
2021 if (new == NULL)
2022 return NULL;
2023 s_new = PyString_AsString(new);
2024 for (i = 0; i < n; i++) {
2025 int c = Py_CHARMASK(*s++);
2026 if (isupper(c)) {
2027 *s_new = tolower(c);
2028 } else
2029 *s_new = c;
2030 s_new++;
2031 }
2032 return new;
2033}
2034
2035
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002036PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037"S.upper() -> string\n\
2038\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002039Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002040
2041static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002042string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043{
2044 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002045 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002046 PyObject *new;
2047
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048 new = PyString_FromStringAndSize(NULL, n);
2049 if (new == NULL)
2050 return NULL;
2051 s_new = PyString_AsString(new);
2052 for (i = 0; i < n; i++) {
2053 int c = Py_CHARMASK(*s++);
2054 if (islower(c)) {
2055 *s_new = toupper(c);
2056 } else
2057 *s_new = c;
2058 s_new++;
2059 }
2060 return new;
2061}
2062
2063
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002064PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065"S.title() -> string\n\
2066\n\
2067Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002068characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002069
2070static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002071string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002072{
2073 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002074 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002075 int previous_is_cased = 0;
2076 PyObject *new;
2077
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 new = PyString_FromStringAndSize(NULL, n);
2079 if (new == NULL)
2080 return NULL;
2081 s_new = PyString_AsString(new);
2082 for (i = 0; i < n; i++) {
2083 int c = Py_CHARMASK(*s++);
2084 if (islower(c)) {
2085 if (!previous_is_cased)
2086 c = toupper(c);
2087 previous_is_cased = 1;
2088 } else if (isupper(c)) {
2089 if (previous_is_cased)
2090 c = tolower(c);
2091 previous_is_cased = 1;
2092 } else
2093 previous_is_cased = 0;
2094 *s_new++ = c;
2095 }
2096 return new;
2097}
2098
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002099PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100"S.capitalize() -> string\n\
2101\n\
2102Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002103capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104
2105static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002106string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107{
2108 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002109 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110 PyObject *new;
2111
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112 new = PyString_FromStringAndSize(NULL, n);
2113 if (new == NULL)
2114 return NULL;
2115 s_new = PyString_AsString(new);
2116 if (0 < n) {
2117 int c = Py_CHARMASK(*s++);
2118 if (islower(c))
2119 *s_new = toupper(c);
2120 else
2121 *s_new = c;
2122 s_new++;
2123 }
2124 for (i = 1; i < n; i++) {
2125 int c = Py_CHARMASK(*s++);
2126 if (isupper(c))
2127 *s_new = tolower(c);
2128 else
2129 *s_new = c;
2130 s_new++;
2131 }
2132 return new;
2133}
2134
2135
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002136PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137"S.count(sub[, start[, end]]) -> int\n\
2138\n\
2139Return the number of occurrences of substring sub in string\n\
2140S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002141interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142
2143static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002144string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002146 const char *s = PyString_AS_STRING(self), *sub, *t;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002147 Py_ssize_t len = PyString_GET_SIZE(self), n;
2148 Py_ssize_t i = 0, last = INT_MAX;
2149 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002150 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151
Guido van Rossumc6821402000-05-08 14:08:05 +00002152 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2153 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002155
Guido van Rossum4c08d552000-03-10 22:55:18 +00002156 if (PyString_Check(subobj)) {
2157 sub = PyString_AS_STRING(subobj);
2158 n = PyString_GET_SIZE(subobj);
2159 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002160#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002161 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002162 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002163 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2164 if (count == -1)
2165 return NULL;
2166 else
2167 return PyInt_FromLong((long) count);
2168 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002169#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002170 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2171 return NULL;
2172
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002173 string_adjust_indices(&i, &last, len);
2174
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175 m = last + 1 - n;
2176 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002177 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178
2179 r = 0;
2180 while (i < m) {
2181 if (!memcmp(s+i, sub, n)) {
2182 r++;
2183 i += n;
2184 } else {
2185 i++;
2186 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002187 if (i >= m)
2188 break;
2189 t = memchr(s+i, sub[0], m-i);
2190 if (t == NULL)
2191 break;
2192 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002194 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195}
2196
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002197PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198"S.swapcase() -> string\n\
2199\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002201converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202
2203static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002204string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205{
2206 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002207 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002208 PyObject *new;
2209
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210 new = PyString_FromStringAndSize(NULL, n);
2211 if (new == NULL)
2212 return NULL;
2213 s_new = PyString_AsString(new);
2214 for (i = 0; i < n; i++) {
2215 int c = Py_CHARMASK(*s++);
2216 if (islower(c)) {
2217 *s_new = toupper(c);
2218 }
2219 else if (isupper(c)) {
2220 *s_new = tolower(c);
2221 }
2222 else
2223 *s_new = c;
2224 s_new++;
2225 }
2226 return new;
2227}
2228
2229
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002230PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231"S.translate(table [,deletechars]) -> string\n\
2232\n\
2233Return a copy of the string S, where all characters occurring\n\
2234in the optional argument deletechars are removed, and the\n\
2235remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002236translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237
2238static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002239string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002240{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241 register char *input, *output;
2242 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002243 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002245 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002246 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247 PyObject *result;
2248 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002249 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002251 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002252 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002254
2255 if (PyString_Check(tableobj)) {
2256 table1 = PyString_AS_STRING(tableobj);
2257 tablen = PyString_GET_SIZE(tableobj);
2258 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002259#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002260 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002261 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 parameter; instead a mapping to None will cause characters
2263 to be deleted. */
2264 if (delobj != NULL) {
2265 PyErr_SetString(PyExc_TypeError,
2266 "deletions are implemented differently for unicode");
2267 return NULL;
2268 }
2269 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2270 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002271#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002273 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002274
Martin v. Löwis00b61272002-12-12 20:03:19 +00002275 if (tablen != 256) {
2276 PyErr_SetString(PyExc_ValueError,
2277 "translation table must be 256 characters long");
2278 return NULL;
2279 }
2280
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 if (delobj != NULL) {
2282 if (PyString_Check(delobj)) {
2283 del_table = PyString_AS_STRING(delobj);
2284 dellen = PyString_GET_SIZE(delobj);
2285 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002286#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287 else if (PyUnicode_Check(delobj)) {
2288 PyErr_SetString(PyExc_TypeError,
2289 "deletions are implemented differently for unicode");
2290 return NULL;
2291 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002292#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2294 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295 }
2296 else {
2297 del_table = NULL;
2298 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002299 }
2300
2301 table = table1;
2302 inlen = PyString_Size(input_obj);
2303 result = PyString_FromStringAndSize((char *)NULL, inlen);
2304 if (result == NULL)
2305 return NULL;
2306 output_start = output = PyString_AsString(result);
2307 input = PyString_AsString(input_obj);
2308
2309 if (dellen == 0) {
2310 /* If no deletions are required, use faster code */
2311 for (i = inlen; --i >= 0; ) {
2312 c = Py_CHARMASK(*input++);
2313 if (Py_CHARMASK((*output++ = table[c])) != c)
2314 changed = 1;
2315 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002316 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317 return result;
2318 Py_DECREF(result);
2319 Py_INCREF(input_obj);
2320 return input_obj;
2321 }
2322
2323 for (i = 0; i < 256; i++)
2324 trans_table[i] = Py_CHARMASK(table[i]);
2325
2326 for (i = 0; i < dellen; i++)
2327 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2328
2329 for (i = inlen; --i >= 0; ) {
2330 c = Py_CHARMASK(*input++);
2331 if (trans_table[c] != -1)
2332 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2333 continue;
2334 changed = 1;
2335 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002336 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 Py_DECREF(result);
2338 Py_INCREF(input_obj);
2339 return input_obj;
2340 }
2341 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002342 if (inlen > 0)
2343 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344 return result;
2345}
2346
2347
2348/* What follows is used for implementing replace(). Perry Stoll. */
2349
2350/*
2351 mymemfind
2352
2353 strstr replacement for arbitrary blocks of memory.
2354
Barry Warsaw51ac5802000-03-20 16:36:48 +00002355 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356 contents of memory pointed to by PAT. Returns the index into MEM if
2357 found, or -1 if not found. If len of PAT is greater than length of
2358 MEM, the function returns -1.
2359*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00002360static Py_ssize_t
2361mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002363 register Py_ssize_t ii;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364
2365 /* pattern can not occur in the last pat_len-1 chars */
2366 len -= pat_len;
2367
2368 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002369 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002370 return ii;
2371 }
2372 }
2373 return -1;
2374}
2375
2376/*
2377 mymemcnt
2378
2379 Return the number of distinct times PAT is found in MEM.
2380 meaning mem=1111 and pat==11 returns 2.
2381 mem=11111 and pat==11 also return 2.
2382 */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002383static Py_ssize_t
2384mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002386 register Py_ssize_t offset = 0;
2387 Py_ssize_t nfound = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002388
2389 while (len >= 0) {
2390 offset = mymemfind(mem, len, pat, pat_len);
2391 if (offset == -1)
2392 break;
2393 mem += offset + pat_len;
2394 len -= offset + pat_len;
2395 nfound++;
2396 }
2397 return nfound;
2398}
2399
2400/*
2401 mymemreplace
2402
Thomas Wouters7e474022000-07-16 12:04:32 +00002403 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404 replaced with SUB.
2405
Thomas Wouters7e474022000-07-16 12:04:32 +00002406 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002407 of PAT in STR, then the original string is returned. Otherwise, a new
2408 string is allocated here and returned.
2409
2410 on return, out_len is:
2411 the length of output string, or
2412 -1 if the input string is returned, or
2413 unchanged if an error occurs (no memory).
2414
2415 return value is:
2416 the new string allocated locally, or
2417 NULL if an error occurred.
2418*/
2419static char *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002420mymemreplace(const char *str, Py_ssize_t len, /* input string */
2421 const char *pat, Py_ssize_t pat_len, /* pattern string to find */
2422 const char *sub, Py_ssize_t sub_len, /* substitution string */
2423 Py_ssize_t count, /* number of replacements */
2424 Py_ssize_t *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425{
2426 char *out_s;
2427 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002428 Py_ssize_t nfound, offset, new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002430 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431 goto return_same;
2432
2433 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002434 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002435 if (count < 0)
2436 count = INT_MAX;
2437 else if (nfound > count)
2438 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439 if (nfound == 0)
2440 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002441
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002442 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002443 if (new_len == 0) {
2444 /* Have to allocate something for the caller to free(). */
2445 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002446 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002447 return NULL;
2448 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002450 else {
2451 assert(new_len > 0);
2452 new_s = (char *)PyMem_MALLOC(new_len);
2453 if (new_s == NULL)
2454 return NULL;
2455 out_s = new_s;
2456
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002457 if (pat_len > 0) {
2458 for (; nfound > 0; --nfound) {
2459 /* find index of next instance of pattern */
2460 offset = mymemfind(str, len, pat, pat_len);
2461 if (offset == -1)
2462 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002463
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002464 /* copy non matching part of input string */
2465 memcpy(new_s, str, offset);
2466 str += offset + pat_len;
2467 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002468
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002469 /* copy substitute into the output string */
2470 new_s += offset;
2471 memcpy(new_s, sub, sub_len);
2472 new_s += sub_len;
2473 }
2474 /* copy any remaining values into output string */
2475 if (len > 0)
2476 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002477 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002478 else {
2479 for (;;++str, --len) {
2480 memcpy(new_s, sub, sub_len);
2481 new_s += sub_len;
2482 if (--nfound <= 0) {
2483 memcpy(new_s, str, len);
2484 break;
2485 }
2486 *new_s++ = *str;
2487 }
2488 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002489 }
2490 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002491 return out_s;
2492
2493 return_same:
2494 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002495 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002496}
2497
2498
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002499PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002500"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501\n\
2502Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002503old replaced by new. If the optional argument count is\n\
2504given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002505
2506static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002507string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002508{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509 const char *str = PyString_AS_STRING(self), *sub, *repl;
2510 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002511 const Py_ssize_t len = PyString_GET_SIZE(self);
2512 Py_ssize_t sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002513 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002514 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002515 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002516
Guido van Rossum4c08d552000-03-10 22:55:18 +00002517 if (!PyArg_ParseTuple(args, "OO|i:replace",
2518 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002519 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002520
2521 if (PyString_Check(subobj)) {
2522 sub = PyString_AS_STRING(subobj);
2523 sub_len = PyString_GET_SIZE(subobj);
2524 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002525#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002527 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002529#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2531 return NULL;
2532
2533 if (PyString_Check(replobj)) {
2534 repl = PyString_AS_STRING(replobj);
2535 repl_len = PyString_GET_SIZE(replobj);
2536 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002537#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002538 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002539 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002540 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002541#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2543 return NULL;
2544
Guido van Rossum4c08d552000-03-10 22:55:18 +00002545 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002546 if (new_s == NULL) {
2547 PyErr_NoMemory();
2548 return NULL;
2549 }
2550 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002551 if (PyString_CheckExact(self)) {
2552 /* we're returning another reference to self */
2553 new = (PyObject*)self;
2554 Py_INCREF(new);
2555 }
2556 else {
2557 new = PyString_FromStringAndSize(str, len);
2558 if (new == NULL)
2559 return NULL;
2560 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002561 }
2562 else {
2563 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002564 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002565 }
2566 return new;
2567}
2568
2569
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002570PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002571"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002572\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002573Return True if S starts with the specified prefix, False otherwise.\n\
2574With optional start, test S beginning at that position.\n\
2575With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002576
2577static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002578string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002579{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002581 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002582 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002583 Py_ssize_t plen;
2584 Py_ssize_t start = 0;
2585 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002586 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002587
Guido van Rossumc6821402000-05-08 14:08:05 +00002588 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2589 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590 return NULL;
2591 if (PyString_Check(subobj)) {
2592 prefix = PyString_AS_STRING(subobj);
2593 plen = PyString_GET_SIZE(subobj);
2594 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002595#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002596 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002597 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002598 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002599 subobj, start, end, -1);
2600 if (rc == -1)
2601 return NULL;
2602 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002603 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002604 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002605#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002607 return NULL;
2608
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002609 string_adjust_indices(&start, &end, len);
2610
2611 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002612 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002613
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002614 if (end-start >= plen)
2615 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2616 else
2617 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002618}
2619
2620
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002621PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002622"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002623\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002624Return True if S ends with the specified suffix, False otherwise.\n\
2625With optional start, test S beginning at that position.\n\
2626With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002627
2628static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002629string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002630{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002631 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002632 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002634 Py_ssize_t slen;
2635 Py_ssize_t start = 0;
2636 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002637 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002638
Guido van Rossumc6821402000-05-08 14:08:05 +00002639 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2640 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002641 return NULL;
2642 if (PyString_Check(subobj)) {
2643 suffix = PyString_AS_STRING(subobj);
2644 slen = PyString_GET_SIZE(subobj);
2645 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002646#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002647 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002648 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002649 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002650 subobj, start, end, +1);
2651 if (rc == -1)
2652 return NULL;
2653 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002654 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002655 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002656#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002657 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002658 return NULL;
2659
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002660 string_adjust_indices(&start, &end, len);
2661
2662 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002663 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002664
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002665 if (end-slen > start)
2666 start = end - slen;
2667 if (end-start >= slen)
2668 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2669 else
2670 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002671}
2672
2673
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002674PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002675"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002676\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002677Encodes S using the codec registered for encoding. encoding defaults\n\
2678to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002679handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002680a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2681'xmlcharrefreplace' as well as any other name registered with\n\
2682codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002683
2684static PyObject *
2685string_encode(PyStringObject *self, PyObject *args)
2686{
2687 char *encoding = NULL;
2688 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002689 PyObject *v;
2690
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002691 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2692 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002693 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002694 if (v == NULL)
2695 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002696 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2697 PyErr_Format(PyExc_TypeError,
2698 "encoder did not return a string/unicode object "
2699 "(type=%.400s)",
2700 v->ob_type->tp_name);
2701 Py_DECREF(v);
2702 return NULL;
2703 }
2704 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002705
2706 onError:
2707 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002708}
2709
2710
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002711PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002712"S.decode([encoding[,errors]]) -> object\n\
2713\n\
2714Decodes S using the codec registered for encoding. encoding defaults\n\
2715to the default encoding. errors may be given to set a different error\n\
2716handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002717a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2718as well as any other name registerd with codecs.register_error that is\n\
2719able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002720
2721static PyObject *
2722string_decode(PyStringObject *self, PyObject *args)
2723{
2724 char *encoding = NULL;
2725 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002726 PyObject *v;
2727
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002728 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2729 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002730 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002731 if (v == NULL)
2732 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002733 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2734 PyErr_Format(PyExc_TypeError,
2735 "decoder did not return a string/unicode object "
2736 "(type=%.400s)",
2737 v->ob_type->tp_name);
2738 Py_DECREF(v);
2739 return NULL;
2740 }
2741 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002742
2743 onError:
2744 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002745}
2746
2747
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002748PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002749"S.expandtabs([tabsize]) -> string\n\
2750\n\
2751Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002752If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002753
2754static PyObject*
2755string_expandtabs(PyStringObject *self, PyObject *args)
2756{
2757 const char *e, *p;
2758 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002759 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002760 PyObject *u;
2761 int tabsize = 8;
2762
2763 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2764 return NULL;
2765
Thomas Wouters7e474022000-07-16 12:04:32 +00002766 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002767 i = j = 0;
2768 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2769 for (p = PyString_AS_STRING(self); p < e; p++)
2770 if (*p == '\t') {
2771 if (tabsize > 0)
2772 j += tabsize - (j % tabsize);
2773 }
2774 else {
2775 j++;
2776 if (*p == '\n' || *p == '\r') {
2777 i += j;
2778 j = 0;
2779 }
2780 }
2781
2782 /* Second pass: create output string and fill it */
2783 u = PyString_FromStringAndSize(NULL, i + j);
2784 if (!u)
2785 return NULL;
2786
2787 j = 0;
2788 q = PyString_AS_STRING(u);
2789
2790 for (p = PyString_AS_STRING(self); p < e; p++)
2791 if (*p == '\t') {
2792 if (tabsize > 0) {
2793 i = tabsize - (j % tabsize);
2794 j += i;
2795 while (i--)
2796 *q++ = ' ';
2797 }
2798 }
2799 else {
2800 j++;
2801 *q++ = *p;
2802 if (*p == '\n' || *p == '\r')
2803 j = 0;
2804 }
2805
2806 return u;
2807}
2808
Tim Peters8fa5dd02001-09-12 02:18:30 +00002809static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002810pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002811{
2812 PyObject *u;
2813
2814 if (left < 0)
2815 left = 0;
2816 if (right < 0)
2817 right = 0;
2818
Tim Peters8fa5dd02001-09-12 02:18:30 +00002819 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002820 Py_INCREF(self);
2821 return (PyObject *)self;
2822 }
2823
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002824 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002825 left + PyString_GET_SIZE(self) + right);
2826 if (u) {
2827 if (left)
2828 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002829 memcpy(PyString_AS_STRING(u) + left,
2830 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002831 PyString_GET_SIZE(self));
2832 if (right)
2833 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2834 fill, right);
2835 }
2836
2837 return u;
2838}
2839
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002840PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002841"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002842"\n"
2843"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002844"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002845
2846static PyObject *
2847string_ljust(PyStringObject *self, PyObject *args)
2848{
2849 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002850 char fillchar = ' ';
2851
2852 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002853 return NULL;
2854
Tim Peters8fa5dd02001-09-12 02:18:30 +00002855 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002856 Py_INCREF(self);
2857 return (PyObject*) self;
2858 }
2859
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002860 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002861}
2862
2863
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002864PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002865"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002866"\n"
2867"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002868"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869
2870static PyObject *
2871string_rjust(PyStringObject *self, PyObject *args)
2872{
2873 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002874 char fillchar = ' ';
2875
2876 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877 return NULL;
2878
Tim Peters8fa5dd02001-09-12 02:18:30 +00002879 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002880 Py_INCREF(self);
2881 return (PyObject*) self;
2882 }
2883
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002884 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002885}
2886
2887
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002888PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002889"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002890"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002891"Return S centered in a string of length width. Padding is\n"
2892"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002893
2894static PyObject *
2895string_center(PyStringObject *self, PyObject *args)
2896{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002897 Py_ssize_t marg, left;
2898 long width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002899 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002900
Martin v. Löwis18e16552006-02-15 17:27:45 +00002901 if (!PyArg_ParseTuple(args, "l|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002902 return NULL;
2903
Tim Peters8fa5dd02001-09-12 02:18:30 +00002904 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002905 Py_INCREF(self);
2906 return (PyObject*) self;
2907 }
2908
2909 marg = width - PyString_GET_SIZE(self);
2910 left = marg / 2 + (marg & width & 1);
2911
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002912 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002913}
2914
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002915PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002916"S.zfill(width) -> string\n"
2917"\n"
2918"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002919"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002920
2921static PyObject *
2922string_zfill(PyStringObject *self, PyObject *args)
2923{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002924 long fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00002925 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002926 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002927
2928 int width;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002929 if (!PyArg_ParseTuple(args, "l:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00002930 return NULL;
2931
2932 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002933 if (PyString_CheckExact(self)) {
2934 Py_INCREF(self);
2935 return (PyObject*) self;
2936 }
2937 else
2938 return PyString_FromStringAndSize(
2939 PyString_AS_STRING(self),
2940 PyString_GET_SIZE(self)
2941 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002942 }
2943
2944 fill = width - PyString_GET_SIZE(self);
2945
2946 s = pad(self, fill, 0, '0');
2947
2948 if (s == NULL)
2949 return NULL;
2950
2951 p = PyString_AS_STRING(s);
2952 if (p[fill] == '+' || p[fill] == '-') {
2953 /* move sign to beginning of string */
2954 p[0] = p[fill];
2955 p[fill] = '0';
2956 }
2957
2958 return (PyObject*) s;
2959}
2960
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002961PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002962"S.isspace() -> bool\n\
2963\n\
2964Return True if all characters in S are whitespace\n\
2965and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002966
2967static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002968string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002969{
Fred Drakeba096332000-07-09 07:04:36 +00002970 register const unsigned char *p
2971 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002972 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002973
Guido van Rossum4c08d552000-03-10 22:55:18 +00002974 /* Shortcut for single character strings */
2975 if (PyString_GET_SIZE(self) == 1 &&
2976 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002977 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002978
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002979 /* Special case for empty strings */
2980 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002981 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002982
Guido van Rossum4c08d552000-03-10 22:55:18 +00002983 e = p + PyString_GET_SIZE(self);
2984 for (; p < e; p++) {
2985 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002986 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002988 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002989}
2990
2991
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002992PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002993"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002994\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00002995Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002996and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002997
2998static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002999string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003000{
Fred Drakeba096332000-07-09 07:04:36 +00003001 register const unsigned char *p
3002 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003003 register const unsigned char *e;
3004
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003005 /* Shortcut for single character strings */
3006 if (PyString_GET_SIZE(self) == 1 &&
3007 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003008 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003009
3010 /* Special case for empty strings */
3011 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003012 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003013
3014 e = p + PyString_GET_SIZE(self);
3015 for (; p < e; p++) {
3016 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003017 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003018 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003019 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003020}
3021
3022
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003023PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003024"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003025\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003026Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003027and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003028
3029static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003030string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003031{
Fred Drakeba096332000-07-09 07:04:36 +00003032 register const unsigned char *p
3033 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003034 register const unsigned char *e;
3035
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003036 /* Shortcut for single character strings */
3037 if (PyString_GET_SIZE(self) == 1 &&
3038 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003039 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003040
3041 /* Special case for empty strings */
3042 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003043 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003044
3045 e = p + PyString_GET_SIZE(self);
3046 for (; p < e; p++) {
3047 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003048 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003049 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003050 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003051}
3052
3053
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003054PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003055"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003056\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003057Return True if all characters in S are digits\n\
3058and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059
3060static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003061string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003062{
Fred Drakeba096332000-07-09 07:04:36 +00003063 register const unsigned char *p
3064 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003065 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003066
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067 /* Shortcut for single character strings */
3068 if (PyString_GET_SIZE(self) == 1 &&
3069 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003070 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003072 /* Special case for empty strings */
3073 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003074 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003075
Guido van Rossum4c08d552000-03-10 22:55:18 +00003076 e = p + PyString_GET_SIZE(self);
3077 for (; p < e; p++) {
3078 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003079 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003081 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082}
3083
3084
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003085PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003086"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003087\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003088Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003089at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090
3091static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003092string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003093{
Fred Drakeba096332000-07-09 07:04:36 +00003094 register const unsigned char *p
3095 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003096 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003097 int cased;
3098
Guido van Rossum4c08d552000-03-10 22:55:18 +00003099 /* Shortcut for single character strings */
3100 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003101 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003102
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003103 /* Special case for empty strings */
3104 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003105 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003106
Guido van Rossum4c08d552000-03-10 22:55:18 +00003107 e = p + PyString_GET_SIZE(self);
3108 cased = 0;
3109 for (; p < e; p++) {
3110 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003111 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003112 else if (!cased && islower(*p))
3113 cased = 1;
3114 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003115 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003116}
3117
3118
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003119PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003120"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003121\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003122Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003123at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003124
3125static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003126string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003127{
Fred Drakeba096332000-07-09 07:04:36 +00003128 register const unsigned char *p
3129 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003130 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003131 int cased;
3132
Guido van Rossum4c08d552000-03-10 22:55:18 +00003133 /* Shortcut for single character strings */
3134 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003135 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003136
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003137 /* Special case for empty strings */
3138 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003139 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003140
Guido van Rossum4c08d552000-03-10 22:55:18 +00003141 e = p + PyString_GET_SIZE(self);
3142 cased = 0;
3143 for (; p < e; p++) {
3144 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003145 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003146 else if (!cased && isupper(*p))
3147 cased = 1;
3148 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003149 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003150}
3151
3152
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003153PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003154"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003156Return True if S is a titlecased string and there is at least one\n\
3157character in S, i.e. uppercase characters may only follow uncased\n\
3158characters and lowercase characters only cased ones. Return False\n\
3159otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003160
3161static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003162string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003163{
Fred Drakeba096332000-07-09 07:04:36 +00003164 register const unsigned char *p
3165 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003166 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167 int cased, previous_is_cased;
3168
Guido van Rossum4c08d552000-03-10 22:55:18 +00003169 /* Shortcut for single character strings */
3170 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003171 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003172
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003173 /* Special case for empty strings */
3174 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003175 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003176
Guido van Rossum4c08d552000-03-10 22:55:18 +00003177 e = p + PyString_GET_SIZE(self);
3178 cased = 0;
3179 previous_is_cased = 0;
3180 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003181 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182
3183 if (isupper(ch)) {
3184 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003185 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003186 previous_is_cased = 1;
3187 cased = 1;
3188 }
3189 else if (islower(ch)) {
3190 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003191 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003192 previous_is_cased = 1;
3193 cased = 1;
3194 }
3195 else
3196 previous_is_cased = 0;
3197 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003198 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003199}
3200
3201
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003202PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003203"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003204\n\
3205Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003206Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003207is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208
Guido van Rossum4c08d552000-03-10 22:55:18 +00003209static PyObject*
3210string_splitlines(PyStringObject *self, PyObject *args)
3211{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003212 register Py_ssize_t i;
3213 register Py_ssize_t j;
3214 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003215 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003216 PyObject *list;
3217 PyObject *str;
3218 char *data;
3219
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003220 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003221 return NULL;
3222
3223 data = PyString_AS_STRING(self);
3224 len = PyString_GET_SIZE(self);
3225
Guido van Rossum4c08d552000-03-10 22:55:18 +00003226 list = PyList_New(0);
3227 if (!list)
3228 goto onError;
3229
3230 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003231 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003232
Guido van Rossum4c08d552000-03-10 22:55:18 +00003233 /* Find a line and append it */
3234 while (i < len && data[i] != '\n' && data[i] != '\r')
3235 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003236
3237 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003238 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003239 if (i < len) {
3240 if (data[i] == '\r' && i + 1 < len &&
3241 data[i+1] == '\n')
3242 i += 2;
3243 else
3244 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003245 if (keepends)
3246 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003247 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003248 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003249 j = i;
3250 }
3251 if (j < len) {
3252 SPLIT_APPEND(data, j, len);
3253 }
3254
3255 return list;
3256
3257 onError:
3258 Py_DECREF(list);
3259 return NULL;
3260}
3261
3262#undef SPLIT_APPEND
3263
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003264static PyObject *
3265string_getnewargs(PyStringObject *v)
3266{
3267 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3268}
3269
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003270
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003271static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003272string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003273 /* Counterparts of the obsolete stropmodule functions; except
3274 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003275 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3276 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003277 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003278 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3279 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003280 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3281 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3282 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3283 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3284 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3285 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3286 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003287 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3288 capitalize__doc__},
3289 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3290 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3291 endswith__doc__},
3292 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3293 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3294 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3295 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3296 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3297 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3298 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3299 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3300 startswith__doc__},
3301 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3302 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3303 swapcase__doc__},
3304 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3305 translate__doc__},
3306 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3307 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3308 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3309 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3310 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3311 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3312 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3313 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3314 expandtabs__doc__},
3315 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3316 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003317 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003318 {NULL, NULL} /* sentinel */
3319};
3320
Jeremy Hylton938ace62002-07-17 16:30:39 +00003321static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003322str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3323
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003324static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003325string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003326{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003327 PyObject *x = NULL;
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00003328 static const char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003329
Guido van Rossumae960af2001-08-30 03:11:59 +00003330 if (type != &PyString_Type)
3331 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003332 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3333 return NULL;
3334 if (x == NULL)
3335 return PyString_FromString("");
3336 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003337}
3338
Guido van Rossumae960af2001-08-30 03:11:59 +00003339static PyObject *
3340str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3341{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003342 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003343 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003344
3345 assert(PyType_IsSubtype(type, &PyString_Type));
3346 tmp = string_new(&PyString_Type, args, kwds);
3347 if (tmp == NULL)
3348 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003349 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003350 n = PyString_GET_SIZE(tmp);
3351 pnew = type->tp_alloc(type, n);
3352 if (pnew != NULL) {
3353 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003354 ((PyStringObject *)pnew)->ob_shash =
3355 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003356 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003357 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003358 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003359 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003360}
3361
Guido van Rossumcacfc072002-05-24 19:01:59 +00003362static PyObject *
3363basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3364{
3365 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003366 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003367 return NULL;
3368}
3369
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003370static PyObject *
3371string_mod(PyObject *v, PyObject *w)
3372{
3373 if (!PyString_Check(v)) {
3374 Py_INCREF(Py_NotImplemented);
3375 return Py_NotImplemented;
3376 }
3377 return PyString_Format(v, w);
3378}
3379
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003380PyDoc_STRVAR(basestring_doc,
3381"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003382
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003383static PyNumberMethods string_as_number = {
3384 0, /*nb_add*/
3385 0, /*nb_subtract*/
3386 0, /*nb_multiply*/
3387 0, /*nb_divide*/
3388 string_mod, /*nb_remainder*/
3389};
3390
3391
Guido van Rossumcacfc072002-05-24 19:01:59 +00003392PyTypeObject PyBaseString_Type = {
3393 PyObject_HEAD_INIT(&PyType_Type)
3394 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003395 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003396 0,
3397 0,
3398 0, /* tp_dealloc */
3399 0, /* tp_print */
3400 0, /* tp_getattr */
3401 0, /* tp_setattr */
3402 0, /* tp_compare */
3403 0, /* tp_repr */
3404 0, /* tp_as_number */
3405 0, /* tp_as_sequence */
3406 0, /* tp_as_mapping */
3407 0, /* tp_hash */
3408 0, /* tp_call */
3409 0, /* tp_str */
3410 0, /* tp_getattro */
3411 0, /* tp_setattro */
3412 0, /* tp_as_buffer */
3413 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3414 basestring_doc, /* tp_doc */
3415 0, /* tp_traverse */
3416 0, /* tp_clear */
3417 0, /* tp_richcompare */
3418 0, /* tp_weaklistoffset */
3419 0, /* tp_iter */
3420 0, /* tp_iternext */
3421 0, /* tp_methods */
3422 0, /* tp_members */
3423 0, /* tp_getset */
3424 &PyBaseObject_Type, /* tp_base */
3425 0, /* tp_dict */
3426 0, /* tp_descr_get */
3427 0, /* tp_descr_set */
3428 0, /* tp_dictoffset */
3429 0, /* tp_init */
3430 0, /* tp_alloc */
3431 basestring_new, /* tp_new */
3432 0, /* tp_free */
3433};
3434
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003435PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003436"str(object) -> string\n\
3437\n\
3438Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003439If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003440
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003441PyTypeObject PyString_Type = {
3442 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003443 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003444 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003445 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003446 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003447 (destructor)string_dealloc, /* tp_dealloc */
3448 (printfunc)string_print, /* tp_print */
3449 0, /* tp_getattr */
3450 0, /* tp_setattr */
3451 0, /* tp_compare */
3452 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003453 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003454 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003455 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003456 (hashfunc)string_hash, /* tp_hash */
3457 0, /* tp_call */
3458 (reprfunc)string_str, /* tp_str */
3459 PyObject_GenericGetAttr, /* tp_getattro */
3460 0, /* tp_setattro */
3461 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003462 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3463 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003464 string_doc, /* tp_doc */
3465 0, /* tp_traverse */
3466 0, /* tp_clear */
3467 (richcmpfunc)string_richcompare, /* tp_richcompare */
3468 0, /* tp_weaklistoffset */
3469 0, /* tp_iter */
3470 0, /* tp_iternext */
3471 string_methods, /* tp_methods */
3472 0, /* tp_members */
3473 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003474 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003475 0, /* tp_dict */
3476 0, /* tp_descr_get */
3477 0, /* tp_descr_set */
3478 0, /* tp_dictoffset */
3479 0, /* tp_init */
3480 0, /* tp_alloc */
3481 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003482 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003483};
3484
3485void
Fred Drakeba096332000-07-09 07:04:36 +00003486PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003487{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003488 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003489 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003490 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003491 if (w == NULL || !PyString_Check(*pv)) {
3492 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003493 *pv = NULL;
3494 return;
3495 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003496 v = string_concat((PyStringObject *) *pv, w);
3497 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003498 *pv = v;
3499}
3500
Guido van Rossum013142a1994-08-30 08:19:36 +00003501void
Fred Drakeba096332000-07-09 07:04:36 +00003502PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003503{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003504 PyString_Concat(pv, w);
3505 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003506}
3507
3508
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003509/* The following function breaks the notion that strings are immutable:
3510 it changes the size of a string. We get away with this only if there
3511 is only one module referencing the object. You can also think of it
3512 as creating a new string object and destroying the old one, only
3513 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003514 already be known to some other part of the code...
3515 Note that if there's not enough memory to resize the string, the original
3516 string object at *pv is deallocated, *pv is set to NULL, an "out of
3517 memory" exception is set, and -1 is returned. Else (on success) 0 is
3518 returned, and the value in *pv may or may not be the same as on input.
3519 As always, an extra byte is allocated for a trailing \0 byte (newsize
3520 does *not* include that), and a trailing \0 byte is stored.
3521*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003522
3523int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003524_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003525{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003526 register PyObject *v;
3527 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003528 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003529 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3530 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003531 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003532 Py_DECREF(v);
3533 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003534 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003535 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003536 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003537 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003538 _Py_ForgetReference(v);
3539 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003540 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003541 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003542 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003543 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003544 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003545 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003546 _Py_NewReference(*pv);
3547 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003548 sv->ob_size = newsize;
3549 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003550 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003551 return 0;
3552}
Guido van Rossume5372401993-03-16 12:15:04 +00003553
3554/* Helpers for formatstring */
3555
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003556static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003557getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003558{
3559 int argidx = *p_argidx;
3560 if (argidx < arglen) {
3561 (*p_argidx)++;
3562 if (arglen < 0)
3563 return args;
3564 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003565 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003566 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003567 PyErr_SetString(PyExc_TypeError,
3568 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003569 return NULL;
3570}
3571
Tim Peters38fd5b62000-09-21 05:43:11 +00003572/* Format codes
3573 * F_LJUST '-'
3574 * F_SIGN '+'
3575 * F_BLANK ' '
3576 * F_ALT '#'
3577 * F_ZERO '0'
3578 */
Guido van Rossume5372401993-03-16 12:15:04 +00003579#define F_LJUST (1<<0)
3580#define F_SIGN (1<<1)
3581#define F_BLANK (1<<2)
3582#define F_ALT (1<<3)
3583#define F_ZERO (1<<4)
3584
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003585static int
Fred Drakeba096332000-07-09 07:04:36 +00003586formatfloat(char *buf, size_t buflen, int flags,
3587 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003588{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003589 /* fmt = '%#.' + `prec` + `type`
3590 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003591 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003592 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003593 x = PyFloat_AsDouble(v);
3594 if (x == -1.0 && PyErr_Occurred()) {
3595 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003596 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003597 }
Guido van Rossume5372401993-03-16 12:15:04 +00003598 if (prec < 0)
3599 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003600 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3601 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003602 /* Worst case length calc to ensure no buffer overrun:
3603
3604 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003605 fmt = %#.<prec>g
3606 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003607 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003608 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003609
3610 'f' formats:
3611 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3612 len = 1 + 50 + 1 + prec = 52 + prec
3613
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003614 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003615 always given), therefore increase the length by one.
3616
3617 */
3618 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3619 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003620 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003621 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003622 return -1;
3623 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003624 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3625 (flags&F_ALT) ? "#" : "",
3626 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003627 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003628 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003629}
3630
Tim Peters38fd5b62000-09-21 05:43:11 +00003631/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3632 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3633 * Python's regular ints.
3634 * Return value: a new PyString*, or NULL if error.
3635 * . *pbuf is set to point into it,
3636 * *plen set to the # of chars following that.
3637 * Caller must decref it when done using pbuf.
3638 * The string starting at *pbuf is of the form
3639 * "-"? ("0x" | "0X")? digit+
3640 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003641 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003642 * There will be at least prec digits, zero-filled on the left if
3643 * necessary to get that many.
3644 * val object to be converted
3645 * flags bitmask of format flags; only F_ALT is looked at
3646 * prec minimum number of digits; 0-fill on left if needed
3647 * type a character in [duoxX]; u acts the same as d
3648 *
3649 * CAUTION: o, x and X conversions on regular ints can never
3650 * produce a '-' sign, but can for Python's unbounded ints.
3651 */
3652PyObject*
3653_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3654 char **pbuf, int *plen)
3655{
3656 PyObject *result = NULL;
3657 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003658 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003659 int sign; /* 1 if '-', else 0 */
3660 int len; /* number of characters */
3661 int numdigits; /* len == numnondigits + numdigits */
3662 int numnondigits = 0;
3663
3664 switch (type) {
3665 case 'd':
3666 case 'u':
3667 result = val->ob_type->tp_str(val);
3668 break;
3669 case 'o':
3670 result = val->ob_type->tp_as_number->nb_oct(val);
3671 break;
3672 case 'x':
3673 case 'X':
3674 numnondigits = 2;
3675 result = val->ob_type->tp_as_number->nb_hex(val);
3676 break;
3677 default:
3678 assert(!"'type' not in [duoxX]");
3679 }
3680 if (!result)
3681 return NULL;
3682
3683 /* To modify the string in-place, there can only be one reference. */
3684 if (result->ob_refcnt != 1) {
3685 PyErr_BadInternalCall();
3686 return NULL;
3687 }
3688 buf = PyString_AsString(result);
3689 len = PyString_Size(result);
3690 if (buf[len-1] == 'L') {
3691 --len;
3692 buf[len] = '\0';
3693 }
3694 sign = buf[0] == '-';
3695 numnondigits += sign;
3696 numdigits = len - numnondigits;
3697 assert(numdigits > 0);
3698
Tim Petersfff53252001-04-12 18:38:48 +00003699 /* Get rid of base marker unless F_ALT */
3700 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003701 /* Need to skip 0x, 0X or 0. */
3702 int skipped = 0;
3703 switch (type) {
3704 case 'o':
3705 assert(buf[sign] == '0');
3706 /* If 0 is only digit, leave it alone. */
3707 if (numdigits > 1) {
3708 skipped = 1;
3709 --numdigits;
3710 }
3711 break;
3712 case 'x':
3713 case 'X':
3714 assert(buf[sign] == '0');
3715 assert(buf[sign + 1] == 'x');
3716 skipped = 2;
3717 numnondigits -= 2;
3718 break;
3719 }
3720 if (skipped) {
3721 buf += skipped;
3722 len -= skipped;
3723 if (sign)
3724 buf[0] = '-';
3725 }
3726 assert(len == numnondigits + numdigits);
3727 assert(numdigits > 0);
3728 }
3729
3730 /* Fill with leading zeroes to meet minimum width. */
3731 if (prec > numdigits) {
3732 PyObject *r1 = PyString_FromStringAndSize(NULL,
3733 numnondigits + prec);
3734 char *b1;
3735 if (!r1) {
3736 Py_DECREF(result);
3737 return NULL;
3738 }
3739 b1 = PyString_AS_STRING(r1);
3740 for (i = 0; i < numnondigits; ++i)
3741 *b1++ = *buf++;
3742 for (i = 0; i < prec - numdigits; i++)
3743 *b1++ = '0';
3744 for (i = 0; i < numdigits; i++)
3745 *b1++ = *buf++;
3746 *b1 = '\0';
3747 Py_DECREF(result);
3748 result = r1;
3749 buf = PyString_AS_STRING(result);
3750 len = numnondigits + prec;
3751 }
3752
3753 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003754 if (type == 'X') {
3755 /* Need to convert all lower case letters to upper case.
3756 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003757 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003758 if (buf[i] >= 'a' && buf[i] <= 'x')
3759 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003760 }
3761 *pbuf = buf;
3762 *plen = len;
3763 return result;
3764}
3765
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003766static int
Fred Drakeba096332000-07-09 07:04:36 +00003767formatint(char *buf, size_t buflen, int flags,
3768 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003769{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003770 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003771 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3772 + 1 + 1 = 24 */
3773 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003774 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003775 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003776
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003777 x = PyInt_AsLong(v);
3778 if (x == -1 && PyErr_Occurred()) {
3779 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003780 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003781 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003782 if (x < 0 && type == 'u') {
3783 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003784 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003785 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3786 sign = "-";
3787 else
3788 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003789 if (prec < 0)
3790 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003791
3792 if ((flags & F_ALT) &&
3793 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003794 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003795 * of issues that cause pain:
3796 * - when 0 is being converted, the C standard leaves off
3797 * the '0x' or '0X', which is inconsistent with other
3798 * %#x/%#X conversions and inconsistent with Python's
3799 * hex() function
3800 * - there are platforms that violate the standard and
3801 * convert 0 with the '0x' or '0X'
3802 * (Metrowerks, Compaq Tru64)
3803 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003804 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003805 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003806 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003807 * We can achieve the desired consistency by inserting our
3808 * own '0x' or '0X' prefix, and substituting %x/%X in place
3809 * of %#x/%#X.
3810 *
3811 * Note that this is the same approach as used in
3812 * formatint() in unicodeobject.c
3813 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003814 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3815 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003816 }
3817 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003818 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3819 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003820 prec, type);
3821 }
3822
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003823 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3824 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003825 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003826 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003827 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003828 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003829 return -1;
3830 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003831 if (sign[0])
3832 PyOS_snprintf(buf, buflen, fmt, -x);
3833 else
3834 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003835 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003836}
3837
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003838static int
Fred Drakeba096332000-07-09 07:04:36 +00003839formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003840{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003841 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003842 if (PyString_Check(v)) {
3843 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003844 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003845 }
3846 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003847 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003848 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003849 }
3850 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003851 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003852}
3853
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003854/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3855
3856 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3857 chars are formatted. XXX This is a magic number. Each formatting
3858 routine does bounds checking to ensure no overflow, but a better
3859 solution may be to malloc a buffer of appropriate size for each
3860 format. For now, the current solution is sufficient.
3861*/
3862#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003863
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003864PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003865PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003866{
3867 char *fmt, *res;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003868 int arglen, argidx;
3869 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00003870 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003871 PyObject *result, *orig_args;
3872#ifdef Py_USING_UNICODE
3873 PyObject *v, *w;
3874#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003875 PyObject *dict = NULL;
3876 if (format == NULL || !PyString_Check(format) || args == NULL) {
3877 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003878 return NULL;
3879 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003880 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003881 fmt = PyString_AS_STRING(format);
3882 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003883 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003884 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003885 if (result == NULL)
3886 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003887 res = PyString_AsString(result);
3888 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003889 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003890 argidx = 0;
3891 }
3892 else {
3893 arglen = -1;
3894 argidx = -2;
3895 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003896 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3897 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003898 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003899 while (--fmtcnt >= 0) {
3900 if (*fmt != '%') {
3901 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003902 rescnt = fmtcnt + 100;
3903 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003904 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003905 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003906 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003907 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003908 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003909 }
3910 *res++ = *fmt++;
3911 }
3912 else {
3913 /* Got a format specifier */
3914 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003915 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003916 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003917 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003918 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003919 PyObject *v = NULL;
3920 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003921 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003922 int sign;
3923 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003924 char formatbuf[FORMATBUFLEN];
3925 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003926#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003927 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003928 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003929#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003930
Guido van Rossumda9c2711996-12-05 21:58:58 +00003931 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003932 if (*fmt == '(') {
3933 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003934 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003935 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003936 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003937
3938 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003939 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003940 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003941 goto error;
3942 }
3943 ++fmt;
3944 --fmtcnt;
3945 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003946 /* Skip over balanced parentheses */
3947 while (pcount > 0 && --fmtcnt >= 0) {
3948 if (*fmt == ')')
3949 --pcount;
3950 else if (*fmt == '(')
3951 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003952 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003953 }
3954 keylen = fmt - keystart - 1;
3955 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003956 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003957 "incomplete format key");
3958 goto error;
3959 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003960 key = PyString_FromStringAndSize(keystart,
3961 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003962 if (key == NULL)
3963 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003964 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003965 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003966 args_owned = 0;
3967 }
3968 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003969 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003970 if (args == NULL) {
3971 goto error;
3972 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003973 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003974 arglen = -1;
3975 argidx = -2;
3976 }
Guido van Rossume5372401993-03-16 12:15:04 +00003977 while (--fmtcnt >= 0) {
3978 switch (c = *fmt++) {
3979 case '-': flags |= F_LJUST; continue;
3980 case '+': flags |= F_SIGN; continue;
3981 case ' ': flags |= F_BLANK; continue;
3982 case '#': flags |= F_ALT; continue;
3983 case '0': flags |= F_ZERO; continue;
3984 }
3985 break;
3986 }
3987 if (c == '*') {
3988 v = getnextarg(args, arglen, &argidx);
3989 if (v == NULL)
3990 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003991 if (!PyInt_Check(v)) {
3992 PyErr_SetString(PyExc_TypeError,
3993 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003994 goto error;
3995 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003996 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003997 if (width < 0) {
3998 flags |= F_LJUST;
3999 width = -width;
4000 }
Guido van Rossume5372401993-03-16 12:15:04 +00004001 if (--fmtcnt >= 0)
4002 c = *fmt++;
4003 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004004 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004005 width = c - '0';
4006 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004007 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004008 if (!isdigit(c))
4009 break;
4010 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004011 PyErr_SetString(
4012 PyExc_ValueError,
4013 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004014 goto error;
4015 }
4016 width = width*10 + (c - '0');
4017 }
4018 }
4019 if (c == '.') {
4020 prec = 0;
4021 if (--fmtcnt >= 0)
4022 c = *fmt++;
4023 if (c == '*') {
4024 v = getnextarg(args, arglen, &argidx);
4025 if (v == NULL)
4026 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004027 if (!PyInt_Check(v)) {
4028 PyErr_SetString(
4029 PyExc_TypeError,
4030 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004031 goto error;
4032 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004033 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004034 if (prec < 0)
4035 prec = 0;
4036 if (--fmtcnt >= 0)
4037 c = *fmt++;
4038 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004039 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004040 prec = c - '0';
4041 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004042 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004043 if (!isdigit(c))
4044 break;
4045 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004046 PyErr_SetString(
4047 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004048 "prec too big");
4049 goto error;
4050 }
4051 prec = prec*10 + (c - '0');
4052 }
4053 }
4054 } /* prec */
4055 if (fmtcnt >= 0) {
4056 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004057 if (--fmtcnt >= 0)
4058 c = *fmt++;
4059 }
4060 }
4061 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004062 PyErr_SetString(PyExc_ValueError,
4063 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004064 goto error;
4065 }
4066 if (c != '%') {
4067 v = getnextarg(args, arglen, &argidx);
4068 if (v == NULL)
4069 goto error;
4070 }
4071 sign = 0;
4072 fill = ' ';
4073 switch (c) {
4074 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004075 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004076 len = 1;
4077 break;
4078 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004079#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004080 if (PyUnicode_Check(v)) {
4081 fmt = fmt_start;
4082 argidx = argidx_start;
4083 goto unicode;
4084 }
Georg Brandld45014b2005-10-01 17:06:00 +00004085#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004086 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004087#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004088 if (temp != NULL && PyUnicode_Check(temp)) {
4089 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004090 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004091 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004092 goto unicode;
4093 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004094#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004095 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004096 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004097 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004098 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004099 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004100 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004101 if (!PyString_Check(temp)) {
4102 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004103 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004104 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004105 goto error;
4106 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004107 pbuf = PyString_AS_STRING(temp);
4108 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004109 if (prec >= 0 && len > prec)
4110 len = prec;
4111 break;
4112 case 'i':
4113 case 'd':
4114 case 'u':
4115 case 'o':
4116 case 'x':
4117 case 'X':
4118 if (c == 'i')
4119 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004120 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004121 temp = _PyString_FormatLong(v, flags,
4122 prec, c, &pbuf, &len);
4123 if (!temp)
4124 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004125 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004126 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004127 else {
4128 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004129 len = formatint(pbuf,
4130 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004131 flags, prec, c, v);
4132 if (len < 0)
4133 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004134 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004135 }
4136 if (flags & F_ZERO)
4137 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004138 break;
4139 case 'e':
4140 case 'E':
4141 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004142 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004143 case 'g':
4144 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004145 if (c == 'F')
4146 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004147 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004148 len = formatfloat(pbuf, sizeof(formatbuf),
4149 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004150 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004151 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004152 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004153 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004154 fill = '0';
4155 break;
4156 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004157#ifdef Py_USING_UNICODE
4158 if (PyUnicode_Check(v)) {
4159 fmt = fmt_start;
4160 argidx = argidx_start;
4161 goto unicode;
4162 }
4163#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004164 pbuf = formatbuf;
4165 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004166 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004167 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004168 break;
4169 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004170 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004171 "unsupported format character '%c' (0x%x) "
4172 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004173 c, c,
4174 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004175 goto error;
4176 }
4177 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004178 if (*pbuf == '-' || *pbuf == '+') {
4179 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004180 len--;
4181 }
4182 else if (flags & F_SIGN)
4183 sign = '+';
4184 else if (flags & F_BLANK)
4185 sign = ' ';
4186 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004187 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004188 }
4189 if (width < len)
4190 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004191 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004192 reslen -= rescnt;
4193 rescnt = width + fmtcnt + 100;
4194 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004195 if (reslen < 0) {
4196 Py_DECREF(result);
4197 return PyErr_NoMemory();
4198 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004199 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004200 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004201 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004202 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004203 }
4204 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004205 if (fill != ' ')
4206 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004207 rescnt--;
4208 if (width > len)
4209 width--;
4210 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004211 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4212 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004213 assert(pbuf[1] == c);
4214 if (fill != ' ') {
4215 *res++ = *pbuf++;
4216 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004217 }
Tim Petersfff53252001-04-12 18:38:48 +00004218 rescnt -= 2;
4219 width -= 2;
4220 if (width < 0)
4221 width = 0;
4222 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004223 }
4224 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004225 do {
4226 --rescnt;
4227 *res++ = fill;
4228 } while (--width > len);
4229 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004230 if (fill == ' ') {
4231 if (sign)
4232 *res++ = sign;
4233 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004234 (c == 'x' || c == 'X')) {
4235 assert(pbuf[0] == '0');
4236 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004237 *res++ = *pbuf++;
4238 *res++ = *pbuf++;
4239 }
4240 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004241 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004242 res += len;
4243 rescnt -= len;
4244 while (--width >= len) {
4245 --rescnt;
4246 *res++ = ' ';
4247 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004248 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004249 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004250 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004251 goto error;
4252 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004253 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004254 } /* '%' */
4255 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004256 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004257 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004258 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004259 goto error;
4260 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004261 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004262 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004263 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004264 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004265 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004266
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004267#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004268 unicode:
4269 if (args_owned) {
4270 Py_DECREF(args);
4271 args_owned = 0;
4272 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004273 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004274 if (PyTuple_Check(orig_args) && argidx > 0) {
4275 PyObject *v;
4276 int n = PyTuple_GET_SIZE(orig_args) - argidx;
4277 v = PyTuple_New(n);
4278 if (v == NULL)
4279 goto error;
4280 while (--n >= 0) {
4281 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4282 Py_INCREF(w);
4283 PyTuple_SET_ITEM(v, n, w);
4284 }
4285 args = v;
4286 } else {
4287 Py_INCREF(orig_args);
4288 args = orig_args;
4289 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004290 args_owned = 1;
4291 /* Take what we have of the result and let the Unicode formatting
4292 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004293 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004294 if (_PyString_Resize(&result, rescnt))
4295 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004296 fmtcnt = PyString_GET_SIZE(format) - \
4297 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004298 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4299 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004300 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004301 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004302 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004303 if (v == NULL)
4304 goto error;
4305 /* Paste what we have (result) to what the Unicode formatting
4306 function returned (v) and return the result (or error) */
4307 w = PyUnicode_Concat(result, v);
4308 Py_DECREF(result);
4309 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004310 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004311 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004312#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004313
Guido van Rossume5372401993-03-16 12:15:04 +00004314 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004315 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004316 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004317 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004318 }
Guido van Rossume5372401993-03-16 12:15:04 +00004319 return NULL;
4320}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004321
Guido van Rossum2a61e741997-01-18 07:55:05 +00004322void
Fred Drakeba096332000-07-09 07:04:36 +00004323PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004324{
4325 register PyStringObject *s = (PyStringObject *)(*p);
4326 PyObject *t;
4327 if (s == NULL || !PyString_Check(s))
4328 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004329 /* If it's a string subclass, we don't really know what putting
4330 it in the interned dict might do. */
4331 if (!PyString_CheckExact(s))
4332 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004333 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004334 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004335 if (interned == NULL) {
4336 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004337 if (interned == NULL) {
4338 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004339 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004340 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004341 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004342 t = PyDict_GetItem(interned, (PyObject *)s);
4343 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004344 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004345 Py_DECREF(*p);
4346 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004347 return;
4348 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004349
Armin Rigo79f7ad22004-08-07 19:27:39 +00004350 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004351 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004352 return;
4353 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004354 /* The two references in interned are not counted by refcnt.
4355 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004356 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004357 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004358}
4359
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004360void
4361PyString_InternImmortal(PyObject **p)
4362{
4363 PyString_InternInPlace(p);
4364 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4365 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4366 Py_INCREF(*p);
4367 }
4368}
4369
Guido van Rossum2a61e741997-01-18 07:55:05 +00004370
4371PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004372PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004373{
4374 PyObject *s = PyString_FromString(cp);
4375 if (s == NULL)
4376 return NULL;
4377 PyString_InternInPlace(&s);
4378 return s;
4379}
4380
Guido van Rossum8cf04761997-08-02 02:57:45 +00004381void
Fred Drakeba096332000-07-09 07:04:36 +00004382PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004383{
4384 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004385 for (i = 0; i < UCHAR_MAX + 1; i++) {
4386 Py_XDECREF(characters[i]);
4387 characters[i] = NULL;
4388 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004389 Py_XDECREF(nullstring);
4390 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004391}
Barry Warsawa903ad982001-02-23 16:40:48 +00004392
Barry Warsawa903ad982001-02-23 16:40:48 +00004393void _Py_ReleaseInternedStrings(void)
4394{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004395 PyObject *keys;
4396 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004397 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004398
4399 if (interned == NULL || !PyDict_Check(interned))
4400 return;
4401 keys = PyDict_Keys(interned);
4402 if (keys == NULL || !PyList_Check(keys)) {
4403 PyErr_Clear();
4404 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004405 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004406
4407 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4408 detector, interned strings are not forcibly deallocated; rather, we
4409 give them their stolen references back, and then clear and DECREF
4410 the interned dict. */
4411
4412 fprintf(stderr, "releasing interned strings\n");
4413 n = PyList_GET_SIZE(keys);
4414 for (i = 0; i < n; i++) {
4415 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4416 switch (s->ob_sstate) {
4417 case SSTATE_NOT_INTERNED:
4418 /* XXX Shouldn't happen */
4419 break;
4420 case SSTATE_INTERNED_IMMORTAL:
4421 s->ob_refcnt += 1;
4422 break;
4423 case SSTATE_INTERNED_MORTAL:
4424 s->ob_refcnt += 2;
4425 break;
4426 default:
4427 Py_FatalError("Inconsistent interned string state.");
4428 }
4429 s->ob_sstate = SSTATE_NOT_INTERNED;
4430 }
4431 Py_DECREF(keys);
4432 PyDict_Clear(interned);
4433 Py_DECREF(interned);
4434 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004435}