blob: 32aacf5a66bfa01d003e55f5be861cf1817be0b2 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000157 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000184 /* likewise for %zd */
185 if (*f == 'z' && *(f+1) == 'd')
186 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000187
Barry Warsawdadace02001-08-24 18:32:06 +0000188 switch (*f) {
189 case 'c':
190 (void)va_arg(count, int);
191 /* fall through... */
192 case '%':
193 n++;
194 break;
195 case 'd': case 'i': case 'x':
196 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000197 /* 20 bytes is enough to hold a 64-bit
198 integer. Decimal takes the most space.
199 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000200 n += 20;
201 break;
202 case 's':
203 s = va_arg(count, char*);
204 n += strlen(s);
205 break;
206 case 'p':
207 (void) va_arg(count, int);
208 /* maximum 64-bit pointer representation:
209 * 0xffffffffffffffff
210 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000211 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000212 */
213 n += 19;
214 break;
215 default:
216 /* if we stumble upon an unknown
217 formatting code, copy the rest of
218 the format string to the output
219 string. (we cannot just skip the
220 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000221 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000222 n += strlen(p);
223 goto expand;
224 }
225 } else
226 n++;
227 }
228 expand:
229 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000230 /* Since we've analyzed how much space we need for the worst case,
231 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 string = PyString_FromStringAndSize(NULL, n);
233 if (!string)
234 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000235
Barry Warsawdadace02001-08-24 18:32:06 +0000236 s = PyString_AsString(string);
237
238 for (f = format; *f; f++) {
239 if (*f == '%') {
240 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000241 Py_ssize_t i;
242 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000243 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 /* handle the size_t flag. */
264 if (*f == 'z' && *(f+1) == 'd') {
265 size_tflag = 1;
266 ++f;
267 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000268
Barry Warsawdadace02001-08-24 18:32:06 +0000269 switch (*f) {
270 case 'c':
271 *s++ = va_arg(vargs, int);
272 break;
273 case 'd':
274 if (longflag)
275 sprintf(s, "%ld", va_arg(vargs, long));
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000276 else if (size_tflag) {
277 /* Instead of checking whether the C
278 library supports %zd, handle the
279 common cases. */
280 #if SIZEOF_SIZE_T == SIZEOF_LONG
281 sprintf(s, "%ld", va_arg(vargs, long));
282 #elif defined(MS_WINDOWS)
283 sprintf(s, "%Id", va_arg(vargs, size_t));
284 #else
285 #error Cannot print size_t values
286 #endif
287 }
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
289 sprintf(s, "%d", va_arg(vargs, int));
290 s += strlen(s);
291 break;
292 case 'i':
293 sprintf(s, "%i", va_arg(vargs, int));
294 s += strlen(s);
295 break;
296 case 'x':
297 sprintf(s, "%x", va_arg(vargs, int));
298 s += strlen(s);
299 break;
300 case 's':
301 p = va_arg(vargs, char*);
302 i = strlen(p);
303 if (n > 0 && i > n)
304 i = n;
305 memcpy(s, p, i);
306 s += i;
307 break;
308 case 'p':
309 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000310 /* %p is ill-defined: ensure leading 0x. */
311 if (s[1] == 'X')
312 s[1] = 'x';
313 else if (s[1] != 'x') {
314 memmove(s+2, s, strlen(s)+1);
315 s[0] = '0';
316 s[1] = 'x';
317 }
Barry Warsawdadace02001-08-24 18:32:06 +0000318 s += strlen(s);
319 break;
320 case '%':
321 *s++ = '%';
322 break;
323 default:
324 strcpy(s, p);
325 s += strlen(s);
326 goto end;
327 }
328 } else
329 *s++ = *f;
330 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000331
Barry Warsawdadace02001-08-24 18:32:06 +0000332 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000333 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000334 return string;
335}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336
Barry Warsawdadace02001-08-24 18:32:06 +0000337PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000339{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000340 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000341 va_list vargs;
342
343#ifdef HAVE_STDARG_PROTOTYPES
344 va_start(vargs, format);
345#else
346 va_start(vargs);
347#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000348 ret = PyString_FromFormatV(format, vargs);
349 va_end(vargs);
350 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000351}
352
353
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000355 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356 const char *encoding,
357 const char *errors)
358{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000359 PyObject *v, *str;
360
361 str = PyString_FromStringAndSize(s, size);
362 if (str == NULL)
363 return NULL;
364 v = PyString_AsDecodedString(str, encoding, errors);
365 Py_DECREF(str);
366 return v;
367}
368
369PyObject *PyString_AsDecodedObject(PyObject *str,
370 const char *encoding,
371 const char *errors)
372{
373 PyObject *v;
374
375 if (!PyString_Check(str)) {
376 PyErr_BadArgument();
377 goto onError;
378 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000379
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000380 if (encoding == NULL) {
381#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383#else
384 PyErr_SetString(PyExc_ValueError, "no encoding specified");
385 goto onError;
386#endif
387 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388
389 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000390 v = PyCodec_Decode(str, encoding, errors);
391 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393
394 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000395
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000396 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000397 return NULL;
398}
399
400PyObject *PyString_AsDecodedString(PyObject *str,
401 const char *encoding,
402 const char *errors)
403{
404 PyObject *v;
405
406 v = PyString_AsDecodedObject(str, encoding, errors);
407 if (v == NULL)
408 goto onError;
409
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000410#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411 /* Convert Unicode to a string using the default encoding */
412 if (PyUnicode_Check(v)) {
413 PyObject *temp = v;
414 v = PyUnicode_AsEncodedString(v, NULL, NULL);
415 Py_DECREF(temp);
416 if (v == NULL)
417 goto onError;
418 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000419#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000420 if (!PyString_Check(v)) {
421 PyErr_Format(PyExc_TypeError,
422 "decoder did not return a string object (type=%.400s)",
423 v->ob_type->tp_name);
424 Py_DECREF(v);
425 goto onError;
426 }
427
428 return v;
429
430 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000431 return NULL;
432}
433
434PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000435 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436 const char *encoding,
437 const char *errors)
438{
439 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000440
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 str = PyString_FromStringAndSize(s, size);
442 if (str == NULL)
443 return NULL;
444 v = PyString_AsEncodedString(str, encoding, errors);
445 Py_DECREF(str);
446 return v;
447}
448
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000449PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000450 const char *encoding,
451 const char *errors)
452{
453 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000454
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 if (!PyString_Check(str)) {
456 PyErr_BadArgument();
457 goto onError;
458 }
459
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000460 if (encoding == NULL) {
461#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000463#else
464 PyErr_SetString(PyExc_ValueError, "no encoding specified");
465 goto onError;
466#endif
467 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468
469 /* Encode via the codec registry */
470 v = PyCodec_Encode(str, encoding, errors);
471 if (v == NULL)
472 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000473
474 return v;
475
476 onError:
477 return NULL;
478}
479
480PyObject *PyString_AsEncodedString(PyObject *str,
481 const char *encoding,
482 const char *errors)
483{
484 PyObject *v;
485
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000486 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000487 if (v == NULL)
488 goto onError;
489
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000490#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 /* Convert Unicode to a string using the default encoding */
492 if (PyUnicode_Check(v)) {
493 PyObject *temp = v;
494 v = PyUnicode_AsEncodedString(v, NULL, NULL);
495 Py_DECREF(temp);
496 if (v == NULL)
497 goto onError;
498 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000499#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000500 if (!PyString_Check(v)) {
501 PyErr_Format(PyExc_TypeError,
502 "encoder did not return a string object (type=%.400s)",
503 v->ob_type->tp_name);
504 Py_DECREF(v);
505 goto onError;
506 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000507
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000508 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 onError:
511 return NULL;
512}
513
Guido van Rossum234f9421993-06-17 12:35:49 +0000514static void
Fred Drakeba096332000-07-09 07:04:36 +0000515string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000516{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000517 switch (PyString_CHECK_INTERNED(op)) {
518 case SSTATE_NOT_INTERNED:
519 break;
520
521 case SSTATE_INTERNED_MORTAL:
522 /* revive dead object temporarily for DelItem */
523 op->ob_refcnt = 3;
524 if (PyDict_DelItem(interned, op) != 0)
525 Py_FatalError(
526 "deletion of interned string failed");
527 break;
528
529 case SSTATE_INTERNED_IMMORTAL:
530 Py_FatalError("Immortal interned string died.");
531
532 default:
533 Py_FatalError("Inconsistent interned string state.");
534 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000535 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000536}
537
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538/* Unescape a backslash-escaped string. If unicode is non-zero,
539 the string is a u-literal. If recode_encoding is non-zero,
540 the string is UTF-8 encoded and should be re-encoded in the
541 specified encoding. */
542
543PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000544 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *recode_encoding)
548{
549 int c;
550 char *p, *buf;
551 const char *end;
552 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000554 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000555 if (v == NULL)
556 return NULL;
557 p = buf = PyString_AsString(v);
558 end = s + len;
559 while (s < end) {
560 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000561 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000562#ifdef Py_USING_UNICODE
563 if (recode_encoding && (*s & 0x80)) {
564 PyObject *u, *w;
565 char *r;
566 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000567 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000568 t = s;
569 /* Decode non-ASCII bytes as UTF-8. */
570 while (t < end && (*t & 0x80)) t++;
571 u = PyUnicode_DecodeUTF8(s, t - s, errors);
572 if(!u) goto failed;
573
574 /* Recode them in target encoding. */
575 w = PyUnicode_AsEncodedString(
576 u, recode_encoding, errors);
577 Py_DECREF(u);
578 if (!w) goto failed;
579
580 /* Append bytes to output buffer. */
581 r = PyString_AsString(w);
582 rn = PyString_Size(w);
583 memcpy(p, r, rn);
584 p += rn;
585 Py_DECREF(w);
586 s = t;
587 } else {
588 *p++ = *s++;
589 }
590#else
591 *p++ = *s++;
592#endif
593 continue;
594 }
595 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000596 if (s==end) {
597 PyErr_SetString(PyExc_ValueError,
598 "Trailing \\ in string");
599 goto failed;
600 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000601 switch (*s++) {
602 /* XXX This assumes ASCII! */
603 case '\n': break;
604 case '\\': *p++ = '\\'; break;
605 case '\'': *p++ = '\''; break;
606 case '\"': *p++ = '\"'; break;
607 case 'b': *p++ = '\b'; break;
608 case 'f': *p++ = '\014'; break; /* FF */
609 case 't': *p++ = '\t'; break;
610 case 'n': *p++ = '\n'; break;
611 case 'r': *p++ = '\r'; break;
612 case 'v': *p++ = '\013'; break; /* VT */
613 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
614 case '0': case '1': case '2': case '3':
615 case '4': case '5': case '6': case '7':
616 c = s[-1] - '0';
617 if ('0' <= *s && *s <= '7') {
618 c = (c<<3) + *s++ - '0';
619 if ('0' <= *s && *s <= '7')
620 c = (c<<3) + *s++ - '0';
621 }
622 *p++ = c;
623 break;
624 case 'x':
625 if (isxdigit(Py_CHARMASK(s[0]))
626 && isxdigit(Py_CHARMASK(s[1]))) {
627 unsigned int x = 0;
628 c = Py_CHARMASK(*s);
629 s++;
630 if (isdigit(c))
631 x = c - '0';
632 else if (islower(c))
633 x = 10 + c - 'a';
634 else
635 x = 10 + c - 'A';
636 x = x << 4;
637 c = Py_CHARMASK(*s);
638 s++;
639 if (isdigit(c))
640 x += c - '0';
641 else if (islower(c))
642 x += 10 + c - 'a';
643 else
644 x += 10 + c - 'A';
645 *p++ = x;
646 break;
647 }
648 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000649 PyErr_SetString(PyExc_ValueError,
650 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000651 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 }
653 if (strcmp(errors, "replace") == 0) {
654 *p++ = '?';
655 } else if (strcmp(errors, "ignore") == 0)
656 /* do nothing */;
657 else {
658 PyErr_Format(PyExc_ValueError,
659 "decoding error; "
660 "unknown error handling code: %.400s",
661 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000662 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 }
664#ifndef Py_USING_UNICODE
665 case 'u':
666 case 'U':
667 case 'N':
668 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000669 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000670 "Unicode escapes not legal "
671 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000672 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 }
674#endif
675 default:
676 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000677 s--;
678 goto non_esc; /* an arbitry number of unescaped
679 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000680 }
681 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000682 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000683 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 return v;
685 failed:
686 Py_DECREF(v);
687 return NULL;
688}
689
Martin v. Löwis18e16552006-02-15 17:27:45 +0000690static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000691string_getsize(register PyObject *op)
692{
693 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000694 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000695 if (PyString_AsStringAndSize(op, &s, &len))
696 return -1;
697 return len;
698}
699
700static /*const*/ char *
701string_getbuffer(register PyObject *op)
702{
703 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000704 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705 if (PyString_AsStringAndSize(op, &s, &len))
706 return NULL;
707 return s;
708}
709
Martin v. Löwis18e16552006-02-15 17:27:45 +0000710Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000711PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000713 if (!PyString_Check(op))
714 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000715 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716}
717
718/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000719PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000720{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (!PyString_Check(op))
722 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000723 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000724}
725
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726int
727PyString_AsStringAndSize(register PyObject *obj,
728 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000729 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730{
731 if (s == NULL) {
732 PyErr_BadInternalCall();
733 return -1;
734 }
735
736 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000737#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000738 if (PyUnicode_Check(obj)) {
739 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
740 if (obj == NULL)
741 return -1;
742 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000743 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000744#endif
745 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000746 PyErr_Format(PyExc_TypeError,
747 "expected string or Unicode object, "
748 "%.200s found", obj->ob_type->tp_name);
749 return -1;
750 }
751 }
752
753 *s = PyString_AS_STRING(obj);
754 if (len != NULL)
755 *len = PyString_GET_SIZE(obj);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000756 else if (strlen(*s) != PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000757 PyErr_SetString(PyExc_TypeError,
758 "expected string without null bytes");
759 return -1;
760 }
761 return 0;
762}
763
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000764/* Methods */
765
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000766static int
Fred Drakeba096332000-07-09 07:04:36 +0000767string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000769 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000772
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000773 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000774 if (! PyString_CheckExact(op)) {
775 int ret;
776 /* A str subclass may have its own __str__ method. */
777 op = (PyStringObject *) PyObject_Str((PyObject *)op);
778 if (op == NULL)
779 return -1;
780 ret = string_print(op, fp, flags);
781 Py_DECREF(op);
782 return ret;
783 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000784 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000785#ifdef __VMS
786 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
787#else
788 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
789#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000792
Thomas Wouters7e474022000-07-16 12:04:32 +0000793 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000795 if (memchr(op->ob_sval, '\'', op->ob_size) &&
796 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000797 quote = '"';
798
799 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800 for (i = 0; i < op->ob_size; i++) {
801 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000802 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000803 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000804 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000805 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000806 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000807 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000808 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000809 fprintf(fp, "\\r");
810 else if (c < ' ' || c >= 0x7f)
811 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000812 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000813 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000816 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817}
818
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000819PyObject *
820PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000821{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000822 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000823 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000824 PyObject *v;
825 if (newsize > INT_MAX) {
826 PyErr_SetString(PyExc_OverflowError,
827 "string is too large to make repr");
828 }
829 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000831 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 }
833 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000834 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 register char c;
836 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 int quote;
838
Thomas Wouters7e474022000-07-16 12:04:32 +0000839 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000840 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841 if (smartquotes &&
842 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000843 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000844 quote = '"';
845
Tim Peters9161c8b2001-12-03 01:55:38 +0000846 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000847 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000849 /* There's at least enough room for a hex escape
850 and a closing quote. */
851 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000853 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000855 else if (c == '\t')
856 *p++ = '\\', *p++ = 't';
857 else if (c == '\n')
858 *p++ = '\\', *p++ = 'n';
859 else if (c == '\r')
860 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000861 else if (c < ' ' || c >= 0x7f) {
862 /* For performance, we don't want to call
863 PyOS_snprintf here (extra layers of
864 function call). */
865 sprintf(p, "\\x%02x", c & 0xff);
866 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000867 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000868 else
869 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000871 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000872 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000874 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000875 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000876 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878}
879
Guido van Rossum189f1df2001-05-01 16:51:53 +0000880static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000881string_repr(PyObject *op)
882{
883 return PyString_Repr(op, 1);
884}
885
886static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000887string_str(PyObject *s)
888{
Tim Petersc9933152001-10-16 20:18:24 +0000889 assert(PyString_Check(s));
890 if (PyString_CheckExact(s)) {
891 Py_INCREF(s);
892 return s;
893 }
894 else {
895 /* Subtype -- return genuine string with the same value. */
896 PyStringObject *t = (PyStringObject *) s;
897 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
898 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000899}
900
Martin v. Löwis18e16552006-02-15 17:27:45 +0000901static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000902string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903{
904 return a->ob_size;
905}
906
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000908string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000910 register size_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000911 register PyStringObject *op;
912 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000913#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000914 if (PyUnicode_Check(bb))
915 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000916#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000917 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000918 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000919 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920 return NULL;
921 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000922#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000924 if ((a->ob_size == 0 || b->ob_size == 0) &&
925 PyString_CheckExact(a) && PyString_CheckExact(b)) {
926 if (a->ob_size == 0) {
927 Py_INCREF(bb);
928 return bb;
929 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 Py_INCREF(a);
931 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000932 }
933 size = a->ob_size + b->ob_size;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000934 /* XXX check overflow */
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000935 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000936 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000937 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000939 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000940 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000941 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000942 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
943 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000944 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946#undef b
947}
948
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000949static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000950string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000951{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000952 register Py_ssize_t i;
953 register Py_ssize_t j;
954 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000956 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000957 if (n < 0)
958 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000959 /* watch out for overflows: the size can overflow int,
960 * and the # of bytes needed can overflow size_t
961 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000962 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000963 if (n && size / n != a->ob_size) {
964 PyErr_SetString(PyExc_OverflowError,
965 "repeated string is too long");
966 return NULL;
967 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000968 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969 Py_INCREF(a);
970 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000971 }
Tim Peterse7c05322004-06-27 17:24:49 +0000972 nbytes = (size_t)size;
973 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000974 PyErr_SetString(PyExc_OverflowError,
975 "repeated string is too long");
976 return NULL;
977 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000978 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000979 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000980 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000982 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000983 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000984 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000985 op->ob_sval[size] = '\0';
986 if (a->ob_size == 1 && n > 0) {
987 memset(op->ob_sval, a->ob_sval[0] , n);
988 return (PyObject *) op;
989 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000990 i = 0;
991 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000992 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
993 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000994 }
995 while (i < size) {
996 j = (i <= size-i) ? i : size-i;
997 memcpy(op->ob_sval+i, op->ob_sval, j);
998 i += j;
999 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001000 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001}
1002
1003/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001006string_slice(register PyStringObject *a, register Py_ssize_t i,
1007 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001008 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001009{
1010 if (i < 0)
1011 i = 0;
1012 if (j < 0)
1013 j = 0; /* Avoid signed/unsigned bug in next line */
1014 if (j > a->ob_size)
1015 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001016 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1017 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001018 Py_INCREF(a);
1019 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001020 }
1021 if (j < i)
1022 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001023 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001024}
1025
Guido van Rossum9284a572000-03-07 15:53:43 +00001026static int
Fred Drakeba096332000-07-09 07:04:36 +00001027string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001028{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001029 char *s = PyString_AS_STRING(a);
1030 const char *sub = PyString_AS_STRING(el);
1031 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001032 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001033 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001034 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001035
1036 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001037#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001038 if (PyUnicode_Check(el))
1039 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001040#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001041 if (!PyString_Check(el)) {
1042 PyErr_SetString(PyExc_TypeError,
1043 "'in <string>' requires string as left operand");
1044 return -1;
1045 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001046 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001047
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001048 if (len_sub == 0)
1049 return 1;
1050 /* last points to one char beyond the start of the rightmost
1051 substring. When s<last, there is still room for a possible match
1052 and s[0] through s[len_sub-1] will be in bounds.
1053 shortsub is len_sub minus the last character which is checked
1054 separately just before the memcmp(). That check helps prevent
1055 false starts and saves the setup time for memcmp().
1056 */
1057 firstchar = sub[0];
1058 shortsub = len_sub - 1;
1059 lastchar = sub[shortsub];
1060 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1061 while (s < last) {
1062 s = memchr(s, firstchar, last-s);
1063 if (s == NULL)
1064 return 0;
1065 assert(s < last);
1066 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001067 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001068 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001069 }
1070 return 0;
1071}
1072
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001073static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001074string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001075{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001076 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001077 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001079 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001080 return NULL;
1081 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001082 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001083 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001084 if (v == NULL)
1085 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001086 else {
1087#ifdef COUNT_ALLOCS
1088 one_strings++;
1089#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001090 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001091 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001092 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093}
1094
Martin v. Löwiscd353062001-05-24 16:56:35 +00001095static PyObject*
1096string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001098 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001099 Py_ssize_t len_a, len_b;
1100 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001101 PyObject *result;
1102
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001103 /* Make sure both arguments are strings. */
1104 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001105 result = Py_NotImplemented;
1106 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001107 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001108 if (a == b) {
1109 switch (op) {
1110 case Py_EQ:case Py_LE:case Py_GE:
1111 result = Py_True;
1112 goto out;
1113 case Py_NE:case Py_LT:case Py_GT:
1114 result = Py_False;
1115 goto out;
1116 }
1117 }
1118 if (op == Py_EQ) {
1119 /* Supporting Py_NE here as well does not save
1120 much time, since Py_NE is rarely used. */
1121 if (a->ob_size == b->ob_size
1122 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001123 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001124 a->ob_size) == 0)) {
1125 result = Py_True;
1126 } else {
1127 result = Py_False;
1128 }
1129 goto out;
1130 }
1131 len_a = a->ob_size; len_b = b->ob_size;
1132 min_len = (len_a < len_b) ? len_a : len_b;
1133 if (min_len > 0) {
1134 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1135 if (c==0)
1136 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1137 }else
1138 c = 0;
1139 if (c == 0)
1140 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1141 switch (op) {
1142 case Py_LT: c = c < 0; break;
1143 case Py_LE: c = c <= 0; break;
1144 case Py_EQ: assert(0); break; /* unreachable */
1145 case Py_NE: c = c != 0; break;
1146 case Py_GT: c = c > 0; break;
1147 case Py_GE: c = c >= 0; break;
1148 default:
1149 result = Py_NotImplemented;
1150 goto out;
1151 }
1152 result = c ? Py_True : Py_False;
1153 out:
1154 Py_INCREF(result);
1155 return result;
1156}
1157
1158int
1159_PyString_Eq(PyObject *o1, PyObject *o2)
1160{
1161 PyStringObject *a, *b;
1162 a = (PyStringObject*)o1;
1163 b = (PyStringObject*)o2;
1164 return a->ob_size == b->ob_size
1165 && *a->ob_sval == *b->ob_sval
1166 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001167}
1168
Guido van Rossum9bfef441993-03-29 10:43:31 +00001169static long
Fred Drakeba096332000-07-09 07:04:36 +00001170string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001171{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001172 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001173 register unsigned char *p;
1174 register long x;
1175
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001176 if (a->ob_shash != -1)
1177 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001178 len = a->ob_size;
1179 p = (unsigned char *) a->ob_sval;
1180 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001181 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001182 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001183 x ^= a->ob_size;
1184 if (x == -1)
1185 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001186 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001187 return x;
1188}
1189
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001190#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1191
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001192static PyObject*
1193string_subscript(PyStringObject* self, PyObject* item)
1194{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001195 PyNumberMethods *nb = item->ob_type->tp_as_number;
1196 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1197 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001198 if (i == -1 && PyErr_Occurred())
1199 return NULL;
1200 if (i < 0)
1201 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001202 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001203 }
1204 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001205 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206 char* source_buf;
1207 char* result_buf;
1208 PyObject* result;
1209
1210 if (PySlice_GetIndicesEx((PySliceObject*)item,
1211 PyString_GET_SIZE(self),
1212 &start, &stop, &step, &slicelength) < 0) {
1213 return NULL;
1214 }
1215
1216 if (slicelength <= 0) {
1217 return PyString_FromStringAndSize("", 0);
1218 }
1219 else {
1220 source_buf = PyString_AsString((PyObject*)self);
1221 result_buf = PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001222 if (result_buf == NULL)
1223 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001224
1225 for (cur = start, i = 0; i < slicelength;
1226 cur += step, i++) {
1227 result_buf[i] = source_buf[cur];
1228 }
1229
1230 result = PyString_FromStringAndSize(result_buf,
1231 slicelength);
1232 PyMem_Free(result_buf);
1233 return result;
1234 }
1235 }
1236 else {
1237 PyErr_SetString(PyExc_TypeError,
1238 "string indices must be integers");
1239 return NULL;
1240 }
1241}
1242
Martin v. Löwis18e16552006-02-15 17:27:45 +00001243static Py_ssize_t
1244string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001245{
1246 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001247 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001248 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001249 return -1;
1250 }
1251 *ptr = (void *)self->ob_sval;
1252 return self->ob_size;
1253}
1254
Martin v. Löwis18e16552006-02-15 17:27:45 +00001255static Py_ssize_t
1256string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257{
Guido van Rossum045e6881997-09-08 18:30:11 +00001258 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001259 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001260 return -1;
1261}
1262
Martin v. Löwis18e16552006-02-15 17:27:45 +00001263static Py_ssize_t
1264string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001265{
1266 if ( lenp )
1267 *lenp = self->ob_size;
1268 return 1;
1269}
1270
Martin v. Löwis18e16552006-02-15 17:27:45 +00001271static Py_ssize_t
1272string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001273{
1274 if ( index != 0 ) {
1275 PyErr_SetString(PyExc_SystemError,
1276 "accessing non-existent string segment");
1277 return -1;
1278 }
1279 *ptr = self->ob_sval;
1280 return self->ob_size;
1281}
1282
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001283static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001284 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001285 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001286 (ssizeargfunc)string_repeat, /*sq_repeat*/
1287 (ssizeargfunc)string_item, /*sq_item*/
1288 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001289 0, /*sq_ass_item*/
1290 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001291 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001292};
1293
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001294static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001295 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001296 (binaryfunc)string_subscript,
1297 0,
1298};
1299
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001300static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001301 (readbufferproc)string_buffer_getreadbuf,
1302 (writebufferproc)string_buffer_getwritebuf,
1303 (segcountproc)string_buffer_getsegcount,
1304 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001305};
1306
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001307
1308
1309#define LEFTSTRIP 0
1310#define RIGHTSTRIP 1
1311#define BOTHSTRIP 2
1312
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001313/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001314static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1315
1316#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001317
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001318#define SPLIT_APPEND(data, left, right) \
1319 str = PyString_FromStringAndSize((data) + (left), \
1320 (right) - (left)); \
1321 if (str == NULL) \
1322 goto onError; \
1323 if (PyList_Append(list, str)) { \
1324 Py_DECREF(str); \
1325 goto onError; \
1326 } \
1327 else \
1328 Py_DECREF(str);
1329
1330#define SPLIT_INSERT(data, left, right) \
1331 str = PyString_FromStringAndSize((data) + (left), \
1332 (right) - (left)); \
1333 if (str == NULL) \
1334 goto onError; \
1335 if (PyList_Insert(list, 0, str)) { \
1336 Py_DECREF(str); \
1337 goto onError; \
1338 } \
1339 else \
1340 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341
1342static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001343split_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001345 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001346 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347 PyObject *list = PyList_New(0);
1348
1349 if (list == NULL)
1350 return NULL;
1351
Guido van Rossum4c08d552000-03-10 22:55:18 +00001352 for (i = j = 0; i < len; ) {
1353 while (i < len && isspace(Py_CHARMASK(s[i])))
1354 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001355 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001356 while (i < len && !isspace(Py_CHARMASK(s[i])))
1357 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 if (maxsplit-- <= 0)
1360 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001361 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001362 while (i < len && isspace(Py_CHARMASK(s[i])))
1363 i++;
1364 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365 }
1366 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001367 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001368 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001369 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001370 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001371 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372 Py_DECREF(list);
1373 return NULL;
1374}
1375
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001376static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001377split_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001378{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001379 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001380 PyObject *str;
1381 PyObject *list = PyList_New(0);
1382
1383 if (list == NULL)
1384 return NULL;
1385
1386 for (i = j = 0; i < len; ) {
1387 if (s[i] == ch) {
1388 if (maxcount-- <= 0)
1389 break;
1390 SPLIT_APPEND(s, j, i);
1391 i = j = i + 1;
1392 } else
1393 i++;
1394 }
1395 if (j <= len) {
1396 SPLIT_APPEND(s, j, len);
1397 }
1398 return list;
1399
1400 onError:
1401 Py_DECREF(list);
1402 return NULL;
1403}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001405PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406"S.split([sep [,maxsplit]]) -> list of strings\n\
1407\n\
1408Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001409delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001410splits are done. If sep is not specified or is None, any\n\
1411whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412
1413static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001414string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001416 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1417 int err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001418 int maxsplit = -1;
1419 const char *s = PyString_AS_STRING(self), *sub;
1420 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421
Guido van Rossum4c08d552000-03-10 22:55:18 +00001422 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001424 if (maxsplit < 0)
1425 maxsplit = INT_MAX;
1426 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001427 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001428 if (PyString_Check(subobj)) {
1429 sub = PyString_AS_STRING(subobj);
1430 n = PyString_GET_SIZE(subobj);
1431 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001432#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001433 else if (PyUnicode_Check(subobj))
1434 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001435#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001436 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1437 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001438
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439 if (n == 0) {
1440 PyErr_SetString(PyExc_ValueError, "empty separator");
1441 return NULL;
1442 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001443 else if (n == 1)
1444 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001445
1446 list = PyList_New(0);
1447 if (list == NULL)
1448 return NULL;
1449
1450 i = j = 0;
1451 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001452 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001453 if (maxsplit-- <= 0)
1454 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001455 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001456 if (item == NULL)
1457 goto fail;
1458 err = PyList_Append(list, item);
1459 Py_DECREF(item);
1460 if (err < 0)
1461 goto fail;
1462 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001463 }
1464 else
1465 i++;
1466 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001467 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001468 if (item == NULL)
1469 goto fail;
1470 err = PyList_Append(list, item);
1471 Py_DECREF(item);
1472 if (err < 0)
1473 goto fail;
1474
1475 return list;
1476
1477 fail:
1478 Py_DECREF(list);
1479 return NULL;
1480}
1481
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001482static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001483rsplit_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001484{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001485 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001486 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001487 PyObject *list = PyList_New(0);
1488
1489 if (list == NULL)
1490 return NULL;
1491
1492 for (i = j = len - 1; i >= 0; ) {
1493 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1494 i--;
1495 j = i;
1496 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1497 i--;
1498 if (j > i) {
1499 if (maxsplit-- <= 0)
1500 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001501 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001502 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1503 i--;
1504 j = i;
1505 }
1506 }
1507 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001508 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001509 }
1510 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001511 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001512 Py_DECREF(list);
1513 return NULL;
1514}
1515
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001516static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001517rsplit_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001518{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001519 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001520 PyObject *str;
1521 PyObject *list = PyList_New(0);
1522
1523 if (list == NULL)
1524 return NULL;
1525
1526 for (i = j = len - 1; i >= 0; ) {
1527 if (s[i] == ch) {
1528 if (maxcount-- <= 0)
1529 break;
1530 SPLIT_INSERT(s, i + 1, j + 1);
1531 j = i = i - 1;
1532 } else
1533 i--;
1534 }
1535 if (j >= -1) {
1536 SPLIT_INSERT(s, 0, j + 1);
1537 }
1538 return list;
1539
1540 onError:
1541 Py_DECREF(list);
1542 return NULL;
1543}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001544
1545PyDoc_STRVAR(rsplit__doc__,
1546"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1547\n\
1548Return a list of the words in the string S, using sep as the\n\
1549delimiter string, starting at the end of the string and working\n\
1550to the front. If maxsplit is given, at most maxsplit splits are\n\
1551done. If sep is not specified or is None, any whitespace string\n\
1552is a separator.");
1553
1554static PyObject *
1555string_rsplit(PyStringObject *self, PyObject *args)
1556{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001557 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1558 int err;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001559 int maxsplit = -1;
1560 const char *s = PyString_AS_STRING(self), *sub;
1561 PyObject *list, *item, *subobj = Py_None;
1562
1563 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1564 return NULL;
1565 if (maxsplit < 0)
1566 maxsplit = INT_MAX;
1567 if (subobj == Py_None)
1568 return rsplit_whitespace(s, len, maxsplit);
1569 if (PyString_Check(subobj)) {
1570 sub = PyString_AS_STRING(subobj);
1571 n = PyString_GET_SIZE(subobj);
1572 }
1573#ifdef Py_USING_UNICODE
1574 else if (PyUnicode_Check(subobj))
1575 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1576#endif
1577 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1578 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001579
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001580 if (n == 0) {
1581 PyErr_SetString(PyExc_ValueError, "empty separator");
1582 return NULL;
1583 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001584 else if (n == 1)
1585 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001586
1587 list = PyList_New(0);
1588 if (list == NULL)
1589 return NULL;
1590
1591 j = len;
1592 i = j - n;
1593 while (i >= 0) {
1594 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1595 if (maxsplit-- <= 0)
1596 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001597 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001598 if (item == NULL)
1599 goto fail;
1600 err = PyList_Insert(list, 0, item);
1601 Py_DECREF(item);
1602 if (err < 0)
1603 goto fail;
1604 j = i;
1605 i -= n;
1606 }
1607 else
1608 i--;
1609 }
1610 item = PyString_FromStringAndSize(s, j);
1611 if (item == NULL)
1612 goto fail;
1613 err = PyList_Insert(list, 0, item);
1614 Py_DECREF(item);
1615 if (err < 0)
1616 goto fail;
1617
1618 return list;
1619
1620 fail:
1621 Py_DECREF(list);
1622 return NULL;
1623}
1624
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001626PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627"S.join(sequence) -> string\n\
1628\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001629Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001630sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631
1632static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001633string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634{
1635 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001636 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001637 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001639 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001640 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001641 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001642 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001643
Tim Peters19fe14e2001-01-19 03:03:47 +00001644 seq = PySequence_Fast(orig, "");
1645 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001646 return NULL;
1647 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001648
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001649 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001650 if (seqlen == 0) {
1651 Py_DECREF(seq);
1652 return PyString_FromString("");
1653 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001655 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001656 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1657 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001658 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001659 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001660 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001661 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001662
Raymond Hettinger674f2412004-08-23 23:23:54 +00001663 /* There are at least two things to join, or else we have a subclass
1664 * of the builtin types in the sequence.
1665 * Do a pre-pass to figure out the total amount of space we'll
1666 * need (sz), see whether any argument is absurd, and defer to
1667 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001668 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001669 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001670 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001671 item = PySequence_Fast_GET_ITEM(seq, i);
1672 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001673#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001674 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001675 /* Defer to Unicode join.
1676 * CAUTION: There's no gurantee that the
1677 * original sequence can be iterated over
1678 * again, so we must pass seq here.
1679 */
1680 PyObject *result;
1681 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001682 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001683 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001684 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001685#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001686 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001687 "sequence item %i: expected string,"
1688 " %.80s found",
Martin v. Löwis18e16552006-02-15 17:27:45 +00001689 /*XXX*/(int)i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001690 Py_DECREF(seq);
1691 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001692 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001693 sz += PyString_GET_SIZE(item);
1694 if (i != 0)
1695 sz += seplen;
1696 if (sz < old_sz || sz > INT_MAX) {
1697 PyErr_SetString(PyExc_OverflowError,
1698 "join() is too long for a Python string");
1699 Py_DECREF(seq);
1700 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001701 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001702 }
1703
1704 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001705 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001706 if (res == NULL) {
1707 Py_DECREF(seq);
1708 return NULL;
1709 }
1710
1711 /* Catenate everything. */
1712 p = PyString_AS_STRING(res);
1713 for (i = 0; i < seqlen; ++i) {
1714 size_t n;
1715 item = PySequence_Fast_GET_ITEM(seq, i);
1716 n = PyString_GET_SIZE(item);
1717 memcpy(p, PyString_AS_STRING(item), n);
1718 p += n;
1719 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001720 memcpy(p, sep, seplen);
1721 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001722 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001724
Jeremy Hylton49048292000-07-11 03:28:17 +00001725 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001726 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727}
1728
Tim Peters52e155e2001-06-16 05:42:57 +00001729PyObject *
1730_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001731{
Tim Petersa7259592001-06-16 05:11:17 +00001732 assert(sep != NULL && PyString_Check(sep));
1733 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001734 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001735}
1736
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001737static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001738string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001739{
1740 if (*end > len)
1741 *end = len;
1742 else if (*end < 0)
1743 *end += len;
1744 if (*end < 0)
1745 *end = 0;
1746 if (*start < 0)
1747 *start += len;
1748 if (*start < 0)
1749 *start = 0;
1750}
1751
Martin v. Löwis18e16552006-02-15 17:27:45 +00001752static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001753string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001756 Py_ssize_t len = PyString_GET_SIZE(self);
1757 Py_ssize_t n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001758 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759
Martin v. Löwis18e16552006-02-15 17:27:45 +00001760 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001761 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001762 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001763 return -2;
1764 if (PyString_Check(subobj)) {
1765 sub = PyString_AS_STRING(subobj);
1766 n = PyString_GET_SIZE(subobj);
1767 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001768#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001769 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001770 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001771#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001772 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773 return -2;
1774
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001775 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776
Guido van Rossum4c08d552000-03-10 22:55:18 +00001777 if (dir > 0) {
1778 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001780 last -= n;
1781 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001782 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001783 return (long)i;
1784 }
1785 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001786 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001787
Guido van Rossum4c08d552000-03-10 22:55:18 +00001788 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001789 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001790 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001791 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001792 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001793 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001794
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795 return -1;
1796}
1797
1798
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001799PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800"S.find(sub [,start [,end]]) -> int\n\
1801\n\
1802Return the lowest index in S where substring sub is found,\n\
1803such that sub is contained within s[start,end]. Optional\n\
1804arguments start and end are interpreted as in slice notation.\n\
1805\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001806Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807
1808static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001809string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001811 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812 if (result == -2)
1813 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001814 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815}
1816
1817
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001818PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819"S.index(sub [,start [,end]]) -> int\n\
1820\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001821Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822
1823static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001824string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001825{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001826 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 if (result == -2)
1828 return NULL;
1829 if (result == -1) {
1830 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001831 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832 return NULL;
1833 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001834 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835}
1836
1837
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001838PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839"S.rfind(sub [,start [,end]]) -> int\n\
1840\n\
1841Return the highest index in S where substring sub is found,\n\
1842such that sub is contained within s[start,end]. Optional\n\
1843arguments start and end are interpreted as in slice notation.\n\
1844\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001845Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846
1847static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001848string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001850 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851 if (result == -2)
1852 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001853 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001854}
1855
1856
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001857PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858"S.rindex(sub [,start [,end]]) -> int\n\
1859\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001860Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861
1862static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001863string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001865 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866 if (result == -2)
1867 return NULL;
1868 if (result == -1) {
1869 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001870 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871 return NULL;
1872 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001873 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001874}
1875
1876
1877static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001878do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1879{
1880 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001881 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001882 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001883 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1884 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001885
1886 i = 0;
1887 if (striptype != RIGHTSTRIP) {
1888 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1889 i++;
1890 }
1891 }
1892
1893 j = len;
1894 if (striptype != LEFTSTRIP) {
1895 do {
1896 j--;
1897 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1898 j++;
1899 }
1900
1901 if (i == 0 && j == len && PyString_CheckExact(self)) {
1902 Py_INCREF(self);
1903 return (PyObject*)self;
1904 }
1905 else
1906 return PyString_FromStringAndSize(s+i, j-i);
1907}
1908
1909
1910static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001911do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912{
1913 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001914 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916 i = 0;
1917 if (striptype != RIGHTSTRIP) {
1918 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1919 i++;
1920 }
1921 }
1922
1923 j = len;
1924 if (striptype != LEFTSTRIP) {
1925 do {
1926 j--;
1927 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1928 j++;
1929 }
1930
Tim Peters8fa5dd02001-09-12 02:18:30 +00001931 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932 Py_INCREF(self);
1933 return (PyObject*)self;
1934 }
1935 else
1936 return PyString_FromStringAndSize(s+i, j-i);
1937}
1938
1939
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001940static PyObject *
1941do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1942{
1943 PyObject *sep = NULL;
1944
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001945 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001946 return NULL;
1947
1948 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001949 if (PyString_Check(sep))
1950 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001951#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001952 else if (PyUnicode_Check(sep)) {
1953 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1954 PyObject *res;
1955 if (uniself==NULL)
1956 return NULL;
1957 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1958 striptype, sep);
1959 Py_DECREF(uniself);
1960 return res;
1961 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001962#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001963 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001964 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001965#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001966 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001967#else
1968 "%s arg must be None or str",
1969#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001970 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001971 return NULL;
1972 }
1973 return do_xstrip(self, striptype, sep);
1974 }
1975
1976 return do_strip(self, striptype);
1977}
1978
1979
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001980PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001981"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982\n\
1983Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001984whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001985If chars is given and not None, remove characters in chars instead.\n\
1986If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987
1988static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001989string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001991 if (PyTuple_GET_SIZE(args) == 0)
1992 return do_strip(self, BOTHSTRIP); /* Common case */
1993 else
1994 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995}
1996
1997
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001998PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001999"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002001Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002002If chars is given and not None, remove characters in chars instead.\n\
2003If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004
2005static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002006string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002007{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002008 if (PyTuple_GET_SIZE(args) == 0)
2009 return do_strip(self, LEFTSTRIP); /* Common case */
2010 else
2011 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012}
2013
2014
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002015PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002016"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002017\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002018Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002019If chars is given and not None, remove characters in chars instead.\n\
2020If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021
2022static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002023string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002025 if (PyTuple_GET_SIZE(args) == 0)
2026 return do_strip(self, RIGHTSTRIP); /* Common case */
2027 else
2028 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029}
2030
2031
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002032PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033"S.lower() -> string\n\
2034\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002035Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036
2037static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002038string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039{
2040 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002041 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042 PyObject *new;
2043
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002044 new = PyString_FromStringAndSize(NULL, n);
2045 if (new == NULL)
2046 return NULL;
2047 s_new = PyString_AsString(new);
2048 for (i = 0; i < n; i++) {
2049 int c = Py_CHARMASK(*s++);
2050 if (isupper(c)) {
2051 *s_new = tolower(c);
2052 } else
2053 *s_new = c;
2054 s_new++;
2055 }
2056 return new;
2057}
2058
2059
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002060PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061"S.upper() -> string\n\
2062\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002063Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064
2065static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002066string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002067{
2068 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002069 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070 PyObject *new;
2071
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072 new = PyString_FromStringAndSize(NULL, n);
2073 if (new == NULL)
2074 return NULL;
2075 s_new = PyString_AsString(new);
2076 for (i = 0; i < n; i++) {
2077 int c = Py_CHARMASK(*s++);
2078 if (islower(c)) {
2079 *s_new = toupper(c);
2080 } else
2081 *s_new = c;
2082 s_new++;
2083 }
2084 return new;
2085}
2086
2087
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002088PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002089"S.title() -> string\n\
2090\n\
2091Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002092characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002093
2094static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002095string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096{
2097 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002098 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002099 int previous_is_cased = 0;
2100 PyObject *new;
2101
Guido van Rossum4c08d552000-03-10 22:55:18 +00002102 new = PyString_FromStringAndSize(NULL, n);
2103 if (new == NULL)
2104 return NULL;
2105 s_new = PyString_AsString(new);
2106 for (i = 0; i < n; i++) {
2107 int c = Py_CHARMASK(*s++);
2108 if (islower(c)) {
2109 if (!previous_is_cased)
2110 c = toupper(c);
2111 previous_is_cased = 1;
2112 } else if (isupper(c)) {
2113 if (previous_is_cased)
2114 c = tolower(c);
2115 previous_is_cased = 1;
2116 } else
2117 previous_is_cased = 0;
2118 *s_new++ = c;
2119 }
2120 return new;
2121}
2122
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002123PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124"S.capitalize() -> string\n\
2125\n\
2126Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002127capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128
2129static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002130string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131{
2132 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002133 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134 PyObject *new;
2135
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136 new = PyString_FromStringAndSize(NULL, n);
2137 if (new == NULL)
2138 return NULL;
2139 s_new = PyString_AsString(new);
2140 if (0 < n) {
2141 int c = Py_CHARMASK(*s++);
2142 if (islower(c))
2143 *s_new = toupper(c);
2144 else
2145 *s_new = c;
2146 s_new++;
2147 }
2148 for (i = 1; i < n; i++) {
2149 int c = Py_CHARMASK(*s++);
2150 if (isupper(c))
2151 *s_new = tolower(c);
2152 else
2153 *s_new = c;
2154 s_new++;
2155 }
2156 return new;
2157}
2158
2159
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002160PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161"S.count(sub[, start[, end]]) -> int\n\
2162\n\
2163Return the number of occurrences of substring sub in string\n\
2164S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002165interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166
2167static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002168string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002170 const char *s = PyString_AS_STRING(self), *sub, *t;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002171 Py_ssize_t len = PyString_GET_SIZE(self), n;
2172 Py_ssize_t i = 0, last = INT_MAX;
2173 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175
Guido van Rossumc6821402000-05-08 14:08:05 +00002176 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2177 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002178 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002179
Guido van Rossum4c08d552000-03-10 22:55:18 +00002180 if (PyString_Check(subobj)) {
2181 sub = PyString_AS_STRING(subobj);
2182 n = PyString_GET_SIZE(subobj);
2183 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002184#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002185 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002186 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002187 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2188 if (count == -1)
2189 return NULL;
2190 else
2191 return PyInt_FromLong((long) count);
2192 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002193#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2195 return NULL;
2196
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002197 string_adjust_indices(&i, &last, len);
2198
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199 m = last + 1 - n;
2200 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002201 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202
2203 r = 0;
2204 while (i < m) {
2205 if (!memcmp(s+i, sub, n)) {
2206 r++;
2207 i += n;
2208 } else {
2209 i++;
2210 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002211 if (i >= m)
2212 break;
2213 t = memchr(s+i, sub[0], m-i);
2214 if (t == NULL)
2215 break;
2216 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002218 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219}
2220
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002221PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222"S.swapcase() -> string\n\
2223\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002225converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226
2227static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002228string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229{
2230 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002231 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232 PyObject *new;
2233
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234 new = PyString_FromStringAndSize(NULL, n);
2235 if (new == NULL)
2236 return NULL;
2237 s_new = PyString_AsString(new);
2238 for (i = 0; i < n; i++) {
2239 int c = Py_CHARMASK(*s++);
2240 if (islower(c)) {
2241 *s_new = toupper(c);
2242 }
2243 else if (isupper(c)) {
2244 *s_new = tolower(c);
2245 }
2246 else
2247 *s_new = c;
2248 s_new++;
2249 }
2250 return new;
2251}
2252
2253
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002254PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255"S.translate(table [,deletechars]) -> string\n\
2256\n\
2257Return a copy of the string S, where all characters occurring\n\
2258in the optional argument deletechars are removed, and the\n\
2259remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002260translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261
2262static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002263string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265 register char *input, *output;
2266 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002267 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002270 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271 PyObject *result;
2272 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002275 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002277 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278
2279 if (PyString_Check(tableobj)) {
2280 table1 = PyString_AS_STRING(tableobj);
2281 tablen = PyString_GET_SIZE(tableobj);
2282 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002283#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002285 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002286 parameter; instead a mapping to None will cause characters
2287 to be deleted. */
2288 if (delobj != NULL) {
2289 PyErr_SetString(PyExc_TypeError,
2290 "deletions are implemented differently for unicode");
2291 return NULL;
2292 }
2293 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2294 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002295#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002298
Martin v. Löwis00b61272002-12-12 20:03:19 +00002299 if (tablen != 256) {
2300 PyErr_SetString(PyExc_ValueError,
2301 "translation table must be 256 characters long");
2302 return NULL;
2303 }
2304
Guido van Rossum4c08d552000-03-10 22:55:18 +00002305 if (delobj != NULL) {
2306 if (PyString_Check(delobj)) {
2307 del_table = PyString_AS_STRING(delobj);
2308 dellen = PyString_GET_SIZE(delobj);
2309 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002310#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311 else if (PyUnicode_Check(delobj)) {
2312 PyErr_SetString(PyExc_TypeError,
2313 "deletions are implemented differently for unicode");
2314 return NULL;
2315 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002316#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2318 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002319 }
2320 else {
2321 del_table = NULL;
2322 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323 }
2324
2325 table = table1;
2326 inlen = PyString_Size(input_obj);
2327 result = PyString_FromStringAndSize((char *)NULL, inlen);
2328 if (result == NULL)
2329 return NULL;
2330 output_start = output = PyString_AsString(result);
2331 input = PyString_AsString(input_obj);
2332
2333 if (dellen == 0) {
2334 /* If no deletions are required, use faster code */
2335 for (i = inlen; --i >= 0; ) {
2336 c = Py_CHARMASK(*input++);
2337 if (Py_CHARMASK((*output++ = table[c])) != c)
2338 changed = 1;
2339 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002340 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002341 return result;
2342 Py_DECREF(result);
2343 Py_INCREF(input_obj);
2344 return input_obj;
2345 }
2346
2347 for (i = 0; i < 256; i++)
2348 trans_table[i] = Py_CHARMASK(table[i]);
2349
2350 for (i = 0; i < dellen; i++)
2351 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2352
2353 for (i = inlen; --i >= 0; ) {
2354 c = Py_CHARMASK(*input++);
2355 if (trans_table[c] != -1)
2356 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2357 continue;
2358 changed = 1;
2359 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002360 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361 Py_DECREF(result);
2362 Py_INCREF(input_obj);
2363 return input_obj;
2364 }
2365 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002366 if (inlen > 0)
2367 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368 return result;
2369}
2370
2371
2372/* What follows is used for implementing replace(). Perry Stoll. */
2373
2374/*
2375 mymemfind
2376
2377 strstr replacement for arbitrary blocks of memory.
2378
Barry Warsaw51ac5802000-03-20 16:36:48 +00002379 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380 contents of memory pointed to by PAT. Returns the index into MEM if
2381 found, or -1 if not found. If len of PAT is greater than length of
2382 MEM, the function returns -1.
2383*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00002384static Py_ssize_t
2385mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002387 register Py_ssize_t ii;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002388
2389 /* pattern can not occur in the last pat_len-1 chars */
2390 len -= pat_len;
2391
2392 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002393 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 return ii;
2395 }
2396 }
2397 return -1;
2398}
2399
2400/*
2401 mymemcnt
2402
2403 Return the number of distinct times PAT is found in MEM.
2404 meaning mem=1111 and pat==11 returns 2.
2405 mem=11111 and pat==11 also return 2.
2406 */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002407static Py_ssize_t
2408mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002410 register Py_ssize_t offset = 0;
2411 Py_ssize_t nfound = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412
2413 while (len >= 0) {
2414 offset = mymemfind(mem, len, pat, pat_len);
2415 if (offset == -1)
2416 break;
2417 mem += offset + pat_len;
2418 len -= offset + pat_len;
2419 nfound++;
2420 }
2421 return nfound;
2422}
2423
2424/*
2425 mymemreplace
2426
Thomas Wouters7e474022000-07-16 12:04:32 +00002427 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428 replaced with SUB.
2429
Thomas Wouters7e474022000-07-16 12:04:32 +00002430 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002431 of PAT in STR, then the original string is returned. Otherwise, a new
2432 string is allocated here and returned.
2433
2434 on return, out_len is:
2435 the length of output string, or
2436 -1 if the input string is returned, or
2437 unchanged if an error occurs (no memory).
2438
2439 return value is:
2440 the new string allocated locally, or
2441 NULL if an error occurred.
2442*/
2443static char *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002444mymemreplace(const char *str, Py_ssize_t len, /* input string */
2445 const char *pat, Py_ssize_t pat_len, /* pattern string to find */
2446 const char *sub, Py_ssize_t sub_len, /* substitution string */
2447 Py_ssize_t count, /* number of replacements */
2448 Py_ssize_t *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449{
2450 char *out_s;
2451 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002452 Py_ssize_t nfound, offset, new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002453
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002454 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455 goto return_same;
2456
2457 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002458 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002459 if (count < 0)
2460 count = INT_MAX;
2461 else if (nfound > count)
2462 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463 if (nfound == 0)
2464 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002465
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002466 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002467 if (new_len == 0) {
2468 /* Have to allocate something for the caller to free(). */
2469 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002470 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002471 return NULL;
2472 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002473 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002474 else {
2475 assert(new_len > 0);
2476 new_s = (char *)PyMem_MALLOC(new_len);
2477 if (new_s == NULL)
2478 return NULL;
2479 out_s = new_s;
2480
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002481 if (pat_len > 0) {
2482 for (; nfound > 0; --nfound) {
2483 /* find index of next instance of pattern */
2484 offset = mymemfind(str, len, pat, pat_len);
2485 if (offset == -1)
2486 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002487
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002488 /* copy non matching part of input string */
2489 memcpy(new_s, str, offset);
2490 str += offset + pat_len;
2491 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002492
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002493 /* copy substitute into the output string */
2494 new_s += offset;
2495 memcpy(new_s, sub, sub_len);
2496 new_s += sub_len;
2497 }
2498 /* copy any remaining values into output string */
2499 if (len > 0)
2500 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002501 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002502 else {
2503 for (;;++str, --len) {
2504 memcpy(new_s, sub, sub_len);
2505 new_s += sub_len;
2506 if (--nfound <= 0) {
2507 memcpy(new_s, str, len);
2508 break;
2509 }
2510 *new_s++ = *str;
2511 }
2512 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002513 }
2514 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002515 return out_s;
2516
2517 return_same:
2518 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002519 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002520}
2521
2522
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002523PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002524"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002525\n\
2526Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002527old replaced by new. If the optional argument count is\n\
2528given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002529
2530static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002531string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002532{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533 const char *str = PyString_AS_STRING(self), *sub, *repl;
2534 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002535 const Py_ssize_t len = PyString_GET_SIZE(self);
2536 Py_ssize_t sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002537 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002538 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002539 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002540
Guido van Rossum4c08d552000-03-10 22:55:18 +00002541 if (!PyArg_ParseTuple(args, "OO|i:replace",
2542 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002543 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544
2545 if (PyString_Check(subobj)) {
2546 sub = PyString_AS_STRING(subobj);
2547 sub_len = PyString_GET_SIZE(subobj);
2548 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002549#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002550 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002551 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002552 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002553#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002554 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2555 return NULL;
2556
2557 if (PyString_Check(replobj)) {
2558 repl = PyString_AS_STRING(replobj);
2559 repl_len = PyString_GET_SIZE(replobj);
2560 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002561#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002562 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002563 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002564 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002565#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002566 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2567 return NULL;
2568
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002570 if (new_s == NULL) {
2571 PyErr_NoMemory();
2572 return NULL;
2573 }
2574 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002575 if (PyString_CheckExact(self)) {
2576 /* we're returning another reference to self */
2577 new = (PyObject*)self;
2578 Py_INCREF(new);
2579 }
2580 else {
2581 new = PyString_FromStringAndSize(str, len);
2582 if (new == NULL)
2583 return NULL;
2584 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002585 }
2586 else {
2587 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002588 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002589 }
2590 return new;
2591}
2592
2593
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002594PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002595"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002596\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002597Return True if S starts with the specified prefix, False otherwise.\n\
2598With optional start, test S beginning at that position.\n\
2599With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002600
2601static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002602string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002603{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002605 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002607 Py_ssize_t plen;
2608 Py_ssize_t start = 0;
2609 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002610 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002611
Guido van Rossumc6821402000-05-08 14:08:05 +00002612 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2613 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614 return NULL;
2615 if (PyString_Check(subobj)) {
2616 prefix = PyString_AS_STRING(subobj);
2617 plen = PyString_GET_SIZE(subobj);
2618 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002619#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002620 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002621 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002622 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002623 subobj, start, end, -1);
2624 if (rc == -1)
2625 return NULL;
2626 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002627 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002628 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002629#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002630 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002631 return NULL;
2632
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002633 string_adjust_indices(&start, &end, len);
2634
2635 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002636 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002637
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002638 if (end-start >= plen)
2639 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2640 else
2641 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002642}
2643
2644
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002645PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002646"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002647\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002648Return True if S ends with the specified suffix, False otherwise.\n\
2649With optional start, test S beginning at that position.\n\
2650With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002651
2652static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002653string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002654{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002656 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002657 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002658 Py_ssize_t slen;
2659 Py_ssize_t start = 0;
2660 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002661 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002662
Guido van Rossumc6821402000-05-08 14:08:05 +00002663 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2664 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002665 return NULL;
2666 if (PyString_Check(subobj)) {
2667 suffix = PyString_AS_STRING(subobj);
2668 slen = PyString_GET_SIZE(subobj);
2669 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002670#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002671 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002672 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002673 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002674 subobj, start, end, +1);
2675 if (rc == -1)
2676 return NULL;
2677 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002678 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002679 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002680#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002681 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002682 return NULL;
2683
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002684 string_adjust_indices(&start, &end, len);
2685
2686 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002687 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002688
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002689 if (end-slen > start)
2690 start = end - slen;
2691 if (end-start >= slen)
2692 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2693 else
2694 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002695}
2696
2697
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002698PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002699"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002700\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002701Encodes S using the codec registered for encoding. encoding defaults\n\
2702to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002703handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002704a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2705'xmlcharrefreplace' as well as any other name registered with\n\
2706codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002707
2708static PyObject *
2709string_encode(PyStringObject *self, PyObject *args)
2710{
2711 char *encoding = NULL;
2712 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002713 PyObject *v;
2714
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002715 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2716 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002717 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002718 if (v == NULL)
2719 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002720 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2721 PyErr_Format(PyExc_TypeError,
2722 "encoder did not return a string/unicode object "
2723 "(type=%.400s)",
2724 v->ob_type->tp_name);
2725 Py_DECREF(v);
2726 return NULL;
2727 }
2728 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002729
2730 onError:
2731 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002732}
2733
2734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002735PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002736"S.decode([encoding[,errors]]) -> object\n\
2737\n\
2738Decodes S using the codec registered for encoding. encoding defaults\n\
2739to the default encoding. errors may be given to set a different error\n\
2740handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002741a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2742as well as any other name registerd with codecs.register_error that is\n\
2743able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002744
2745static PyObject *
2746string_decode(PyStringObject *self, PyObject *args)
2747{
2748 char *encoding = NULL;
2749 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002750 PyObject *v;
2751
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002752 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2753 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002754 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002755 if (v == NULL)
2756 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002757 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2758 PyErr_Format(PyExc_TypeError,
2759 "decoder did not return a string/unicode object "
2760 "(type=%.400s)",
2761 v->ob_type->tp_name);
2762 Py_DECREF(v);
2763 return NULL;
2764 }
2765 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002766
2767 onError:
2768 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002769}
2770
2771
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002772PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002773"S.expandtabs([tabsize]) -> string\n\
2774\n\
2775Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002776If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002777
2778static PyObject*
2779string_expandtabs(PyStringObject *self, PyObject *args)
2780{
2781 const char *e, *p;
2782 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002783 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002784 PyObject *u;
2785 int tabsize = 8;
2786
2787 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2788 return NULL;
2789
Thomas Wouters7e474022000-07-16 12:04:32 +00002790 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002791 i = j = 0;
2792 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2793 for (p = PyString_AS_STRING(self); p < e; p++)
2794 if (*p == '\t') {
2795 if (tabsize > 0)
2796 j += tabsize - (j % tabsize);
2797 }
2798 else {
2799 j++;
2800 if (*p == '\n' || *p == '\r') {
2801 i += j;
2802 j = 0;
2803 }
2804 }
2805
2806 /* Second pass: create output string and fill it */
2807 u = PyString_FromStringAndSize(NULL, i + j);
2808 if (!u)
2809 return NULL;
2810
2811 j = 0;
2812 q = PyString_AS_STRING(u);
2813
2814 for (p = PyString_AS_STRING(self); p < e; p++)
2815 if (*p == '\t') {
2816 if (tabsize > 0) {
2817 i = tabsize - (j % tabsize);
2818 j += i;
2819 while (i--)
2820 *q++ = ' ';
2821 }
2822 }
2823 else {
2824 j++;
2825 *q++ = *p;
2826 if (*p == '\n' || *p == '\r')
2827 j = 0;
2828 }
2829
2830 return u;
2831}
2832
Tim Peters8fa5dd02001-09-12 02:18:30 +00002833static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002834pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002835{
2836 PyObject *u;
2837
2838 if (left < 0)
2839 left = 0;
2840 if (right < 0)
2841 right = 0;
2842
Tim Peters8fa5dd02001-09-12 02:18:30 +00002843 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002844 Py_INCREF(self);
2845 return (PyObject *)self;
2846 }
2847
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002848 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002849 left + PyString_GET_SIZE(self) + right);
2850 if (u) {
2851 if (left)
2852 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002853 memcpy(PyString_AS_STRING(u) + left,
2854 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002855 PyString_GET_SIZE(self));
2856 if (right)
2857 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2858 fill, right);
2859 }
2860
2861 return u;
2862}
2863
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002864PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002865"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002866"\n"
2867"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002868"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869
2870static PyObject *
2871string_ljust(PyStringObject *self, PyObject *args)
2872{
2873 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002874 char fillchar = ' ';
2875
2876 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877 return NULL;
2878
Tim Peters8fa5dd02001-09-12 02:18:30 +00002879 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002880 Py_INCREF(self);
2881 return (PyObject*) self;
2882 }
2883
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002884 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002885}
2886
2887
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002888PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002889"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002890"\n"
2891"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002892"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002893
2894static PyObject *
2895string_rjust(PyStringObject *self, PyObject *args)
2896{
2897 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002898 char fillchar = ' ';
2899
2900 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002901 return NULL;
2902
Tim Peters8fa5dd02001-09-12 02:18:30 +00002903 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002904 Py_INCREF(self);
2905 return (PyObject*) self;
2906 }
2907
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002908 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002909}
2910
2911
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002912PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002913"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002914"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002915"Return S centered in a string of length width. Padding is\n"
2916"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002917
2918static PyObject *
2919string_center(PyStringObject *self, PyObject *args)
2920{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002921 Py_ssize_t marg, left;
2922 long width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002923 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002924
Martin v. Löwis18e16552006-02-15 17:27:45 +00002925 if (!PyArg_ParseTuple(args, "l|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002926 return NULL;
2927
Tim Peters8fa5dd02001-09-12 02:18:30 +00002928 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002929 Py_INCREF(self);
2930 return (PyObject*) self;
2931 }
2932
2933 marg = width - PyString_GET_SIZE(self);
2934 left = marg / 2 + (marg & width & 1);
2935
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002936 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002937}
2938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002939PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002940"S.zfill(width) -> string\n"
2941"\n"
2942"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002943"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002944
2945static PyObject *
2946string_zfill(PyStringObject *self, PyObject *args)
2947{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002948 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00002949 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002950 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002951
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002952 long width;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002953 if (!PyArg_ParseTuple(args, "l:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00002954 return NULL;
2955
2956 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002957 if (PyString_CheckExact(self)) {
2958 Py_INCREF(self);
2959 return (PyObject*) self;
2960 }
2961 else
2962 return PyString_FromStringAndSize(
2963 PyString_AS_STRING(self),
2964 PyString_GET_SIZE(self)
2965 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002966 }
2967
2968 fill = width - PyString_GET_SIZE(self);
2969
2970 s = pad(self, fill, 0, '0');
2971
2972 if (s == NULL)
2973 return NULL;
2974
2975 p = PyString_AS_STRING(s);
2976 if (p[fill] == '+' || p[fill] == '-') {
2977 /* move sign to beginning of string */
2978 p[0] = p[fill];
2979 p[fill] = '0';
2980 }
2981
2982 return (PyObject*) s;
2983}
2984
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002985PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002986"S.isspace() -> bool\n\
2987\n\
2988Return True if all characters in S are whitespace\n\
2989and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002990
2991static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002992string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002993{
Fred Drakeba096332000-07-09 07:04:36 +00002994 register const unsigned char *p
2995 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002996 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002997
Guido van Rossum4c08d552000-03-10 22:55:18 +00002998 /* Shortcut for single character strings */
2999 if (PyString_GET_SIZE(self) == 1 &&
3000 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003001 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003002
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003003 /* Special case for empty strings */
3004 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003005 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003006
Guido van Rossum4c08d552000-03-10 22:55:18 +00003007 e = p + PyString_GET_SIZE(self);
3008 for (; p < e; p++) {
3009 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003010 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003011 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003012 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003013}
3014
3015
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003016PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003017"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003018\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003019Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003020and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003021
3022static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003023string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003024{
Fred Drakeba096332000-07-09 07:04:36 +00003025 register const unsigned char *p
3026 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003027 register const unsigned char *e;
3028
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003029 /* Shortcut for single character strings */
3030 if (PyString_GET_SIZE(self) == 1 &&
3031 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003032 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003033
3034 /* Special case for empty strings */
3035 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003036 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003037
3038 e = p + PyString_GET_SIZE(self);
3039 for (; p < e; p++) {
3040 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003041 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003042 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003043 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003044}
3045
3046
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003047PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003048"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003049\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003050Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003051and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003052
3053static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003054string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003055{
Fred Drakeba096332000-07-09 07:04:36 +00003056 register const unsigned char *p
3057 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003058 register const unsigned char *e;
3059
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003060 /* Shortcut for single character strings */
3061 if (PyString_GET_SIZE(self) == 1 &&
3062 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003063 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003064
3065 /* Special case for empty strings */
3066 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003067 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003068
3069 e = p + PyString_GET_SIZE(self);
3070 for (; p < e; p++) {
3071 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003072 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003073 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003074 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003075}
3076
3077
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003078PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003079"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003081Return True if all characters in S are digits\n\
3082and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003083
3084static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003085string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086{
Fred Drakeba096332000-07-09 07:04:36 +00003087 register const unsigned char *p
3088 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003089 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090
Guido van Rossum4c08d552000-03-10 22:55:18 +00003091 /* Shortcut for single character strings */
3092 if (PyString_GET_SIZE(self) == 1 &&
3093 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003094 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003095
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003096 /* Special case for empty strings */
3097 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003098 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003099
Guido van Rossum4c08d552000-03-10 22:55:18 +00003100 e = p + PyString_GET_SIZE(self);
3101 for (; p < e; p++) {
3102 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003103 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003104 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003105 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003106}
3107
3108
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003109PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003110"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003111\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003112Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003113at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114
3115static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003116string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003117{
Fred Drakeba096332000-07-09 07:04:36 +00003118 register const unsigned char *p
3119 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003120 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003121 int cased;
3122
Guido van Rossum4c08d552000-03-10 22:55:18 +00003123 /* Shortcut for single character strings */
3124 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003125 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003126
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003127 /* Special case for empty strings */
3128 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003129 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003130
Guido van Rossum4c08d552000-03-10 22:55:18 +00003131 e = p + PyString_GET_SIZE(self);
3132 cased = 0;
3133 for (; p < e; p++) {
3134 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003135 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003136 else if (!cased && islower(*p))
3137 cased = 1;
3138 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003139 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003140}
3141
3142
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003143PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003144"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003146Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003147at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003148
3149static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003150string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003151{
Fred Drakeba096332000-07-09 07:04:36 +00003152 register const unsigned char *p
3153 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003154 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 int cased;
3156
Guido van Rossum4c08d552000-03-10 22:55:18 +00003157 /* Shortcut for single character strings */
3158 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003159 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003160
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003161 /* Special case for empty strings */
3162 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003163 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003164
Guido van Rossum4c08d552000-03-10 22:55:18 +00003165 e = p + PyString_GET_SIZE(self);
3166 cased = 0;
3167 for (; p < e; p++) {
3168 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003169 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003170 else if (!cased && isupper(*p))
3171 cased = 1;
3172 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003173 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003174}
3175
3176
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003177PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003178"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003179\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003180Return True if S is a titlecased string and there is at least one\n\
3181character in S, i.e. uppercase characters may only follow uncased\n\
3182characters and lowercase characters only cased ones. Return False\n\
3183otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003184
3185static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003186string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003187{
Fred Drakeba096332000-07-09 07:04:36 +00003188 register const unsigned char *p
3189 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003190 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003191 int cased, previous_is_cased;
3192
Guido van Rossum4c08d552000-03-10 22:55:18 +00003193 /* Shortcut for single character strings */
3194 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003195 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003197 /* Special case for empty strings */
3198 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003199 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003200
Guido van Rossum4c08d552000-03-10 22:55:18 +00003201 e = p + PyString_GET_SIZE(self);
3202 cased = 0;
3203 previous_is_cased = 0;
3204 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003205 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003206
3207 if (isupper(ch)) {
3208 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003209 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003210 previous_is_cased = 1;
3211 cased = 1;
3212 }
3213 else if (islower(ch)) {
3214 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003215 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003216 previous_is_cased = 1;
3217 cased = 1;
3218 }
3219 else
3220 previous_is_cased = 0;
3221 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003222 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003223}
3224
3225
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003226PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003227"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003228\n\
3229Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003230Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003231is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003232
Guido van Rossum4c08d552000-03-10 22:55:18 +00003233static PyObject*
3234string_splitlines(PyStringObject *self, PyObject *args)
3235{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003236 register Py_ssize_t i;
3237 register Py_ssize_t j;
3238 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003239 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003240 PyObject *list;
3241 PyObject *str;
3242 char *data;
3243
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003244 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003245 return NULL;
3246
3247 data = PyString_AS_STRING(self);
3248 len = PyString_GET_SIZE(self);
3249
Guido van Rossum4c08d552000-03-10 22:55:18 +00003250 list = PyList_New(0);
3251 if (!list)
3252 goto onError;
3253
3254 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003255 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003256
Guido van Rossum4c08d552000-03-10 22:55:18 +00003257 /* Find a line and append it */
3258 while (i < len && data[i] != '\n' && data[i] != '\r')
3259 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003260
3261 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003262 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003263 if (i < len) {
3264 if (data[i] == '\r' && i + 1 < len &&
3265 data[i+1] == '\n')
3266 i += 2;
3267 else
3268 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003269 if (keepends)
3270 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003271 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003272 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003273 j = i;
3274 }
3275 if (j < len) {
3276 SPLIT_APPEND(data, j, len);
3277 }
3278
3279 return list;
3280
3281 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003282 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003283 return NULL;
3284}
3285
3286#undef SPLIT_APPEND
3287
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003288static PyObject *
3289string_getnewargs(PyStringObject *v)
3290{
3291 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3292}
3293
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003294
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003295static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003296string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003297 /* Counterparts of the obsolete stropmodule functions; except
3298 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003299 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3300 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003301 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003302 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3303 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003304 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3305 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3306 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3307 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3308 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3309 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3310 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003311 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3312 capitalize__doc__},
3313 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3314 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3315 endswith__doc__},
3316 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3317 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3318 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3319 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3320 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3321 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3322 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3323 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3324 startswith__doc__},
3325 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3326 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3327 swapcase__doc__},
3328 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3329 translate__doc__},
3330 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3331 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3332 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3333 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3334 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3335 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3336 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3337 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3338 expandtabs__doc__},
3339 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3340 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003341 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003342 {NULL, NULL} /* sentinel */
3343};
3344
Jeremy Hylton938ace62002-07-17 16:30:39 +00003345static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003346str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3347
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003348static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003349string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003350{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003351 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003352 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003353
Guido van Rossumae960af2001-08-30 03:11:59 +00003354 if (type != &PyString_Type)
3355 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003356 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3357 return NULL;
3358 if (x == NULL)
3359 return PyString_FromString("");
3360 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003361}
3362
Guido van Rossumae960af2001-08-30 03:11:59 +00003363static PyObject *
3364str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3365{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003366 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003367 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003368
3369 assert(PyType_IsSubtype(type, &PyString_Type));
3370 tmp = string_new(&PyString_Type, args, kwds);
3371 if (tmp == NULL)
3372 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003373 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003374 n = PyString_GET_SIZE(tmp);
3375 pnew = type->tp_alloc(type, n);
3376 if (pnew != NULL) {
3377 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003378 ((PyStringObject *)pnew)->ob_shash =
3379 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003380 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003381 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003382 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003383 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003384}
3385
Guido van Rossumcacfc072002-05-24 19:01:59 +00003386static PyObject *
3387basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3388{
3389 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003390 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003391 return NULL;
3392}
3393
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003394static PyObject *
3395string_mod(PyObject *v, PyObject *w)
3396{
3397 if (!PyString_Check(v)) {
3398 Py_INCREF(Py_NotImplemented);
3399 return Py_NotImplemented;
3400 }
3401 return PyString_Format(v, w);
3402}
3403
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003404PyDoc_STRVAR(basestring_doc,
3405"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003406
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003407static PyNumberMethods string_as_number = {
3408 0, /*nb_add*/
3409 0, /*nb_subtract*/
3410 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003411 string_mod, /*nb_remainder*/
3412};
3413
3414
Guido van Rossumcacfc072002-05-24 19:01:59 +00003415PyTypeObject PyBaseString_Type = {
3416 PyObject_HEAD_INIT(&PyType_Type)
3417 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003418 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003419 0,
3420 0,
3421 0, /* tp_dealloc */
3422 0, /* tp_print */
3423 0, /* tp_getattr */
3424 0, /* tp_setattr */
3425 0, /* tp_compare */
3426 0, /* tp_repr */
3427 0, /* tp_as_number */
3428 0, /* tp_as_sequence */
3429 0, /* tp_as_mapping */
3430 0, /* tp_hash */
3431 0, /* tp_call */
3432 0, /* tp_str */
3433 0, /* tp_getattro */
3434 0, /* tp_setattro */
3435 0, /* tp_as_buffer */
3436 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3437 basestring_doc, /* tp_doc */
3438 0, /* tp_traverse */
3439 0, /* tp_clear */
3440 0, /* tp_richcompare */
3441 0, /* tp_weaklistoffset */
3442 0, /* tp_iter */
3443 0, /* tp_iternext */
3444 0, /* tp_methods */
3445 0, /* tp_members */
3446 0, /* tp_getset */
3447 &PyBaseObject_Type, /* tp_base */
3448 0, /* tp_dict */
3449 0, /* tp_descr_get */
3450 0, /* tp_descr_set */
3451 0, /* tp_dictoffset */
3452 0, /* tp_init */
3453 0, /* tp_alloc */
3454 basestring_new, /* tp_new */
3455 0, /* tp_free */
3456};
3457
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003458PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003459"str(object) -> string\n\
3460\n\
3461Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003462If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003463
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003464PyTypeObject PyString_Type = {
3465 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003466 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003467 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003468 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003469 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003470 (destructor)string_dealloc, /* tp_dealloc */
3471 (printfunc)string_print, /* tp_print */
3472 0, /* tp_getattr */
3473 0, /* tp_setattr */
3474 0, /* tp_compare */
3475 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003476 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003477 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003478 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003479 (hashfunc)string_hash, /* tp_hash */
3480 0, /* tp_call */
3481 (reprfunc)string_str, /* tp_str */
3482 PyObject_GenericGetAttr, /* tp_getattro */
3483 0, /* tp_setattro */
3484 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003485 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3486 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003487 string_doc, /* tp_doc */
3488 0, /* tp_traverse */
3489 0, /* tp_clear */
3490 (richcmpfunc)string_richcompare, /* tp_richcompare */
3491 0, /* tp_weaklistoffset */
3492 0, /* tp_iter */
3493 0, /* tp_iternext */
3494 string_methods, /* tp_methods */
3495 0, /* tp_members */
3496 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003497 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003498 0, /* tp_dict */
3499 0, /* tp_descr_get */
3500 0, /* tp_descr_set */
3501 0, /* tp_dictoffset */
3502 0, /* tp_init */
3503 0, /* tp_alloc */
3504 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003505 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003506};
3507
3508void
Fred Drakeba096332000-07-09 07:04:36 +00003509PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003510{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003511 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003512 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003513 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003514 if (w == NULL || !PyString_Check(*pv)) {
3515 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003516 *pv = NULL;
3517 return;
3518 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003519 v = string_concat((PyStringObject *) *pv, w);
3520 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003521 *pv = v;
3522}
3523
Guido van Rossum013142a1994-08-30 08:19:36 +00003524void
Fred Drakeba096332000-07-09 07:04:36 +00003525PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003526{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003527 PyString_Concat(pv, w);
3528 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003529}
3530
3531
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003532/* The following function breaks the notion that strings are immutable:
3533 it changes the size of a string. We get away with this only if there
3534 is only one module referencing the object. You can also think of it
3535 as creating a new string object and destroying the old one, only
3536 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003537 already be known to some other part of the code...
3538 Note that if there's not enough memory to resize the string, the original
3539 string object at *pv is deallocated, *pv is set to NULL, an "out of
3540 memory" exception is set, and -1 is returned. Else (on success) 0 is
3541 returned, and the value in *pv may or may not be the same as on input.
3542 As always, an extra byte is allocated for a trailing \0 byte (newsize
3543 does *not* include that), and a trailing \0 byte is stored.
3544*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003545
3546int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003547_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003548{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003549 register PyObject *v;
3550 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003551 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003552 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3553 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003554 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003555 Py_DECREF(v);
3556 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003557 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003558 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003559 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003560 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003561 _Py_ForgetReference(v);
3562 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003563 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003564 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003565 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003566 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003567 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003568 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003569 _Py_NewReference(*pv);
3570 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003571 sv->ob_size = newsize;
3572 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003573 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003574 return 0;
3575}
Guido van Rossume5372401993-03-16 12:15:04 +00003576
3577/* Helpers for formatstring */
3578
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003579static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00003580getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003581{
Thomas Wouters977485d2006-02-16 15:59:12 +00003582 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003583 if (argidx < arglen) {
3584 (*p_argidx)++;
3585 if (arglen < 0)
3586 return args;
3587 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003588 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003589 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003590 PyErr_SetString(PyExc_TypeError,
3591 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003592 return NULL;
3593}
3594
Tim Peters38fd5b62000-09-21 05:43:11 +00003595/* Format codes
3596 * F_LJUST '-'
3597 * F_SIGN '+'
3598 * F_BLANK ' '
3599 * F_ALT '#'
3600 * F_ZERO '0'
3601 */
Guido van Rossume5372401993-03-16 12:15:04 +00003602#define F_LJUST (1<<0)
3603#define F_SIGN (1<<1)
3604#define F_BLANK (1<<2)
3605#define F_ALT (1<<3)
3606#define F_ZERO (1<<4)
3607
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003608static int
Fred Drakeba096332000-07-09 07:04:36 +00003609formatfloat(char *buf, size_t buflen, int flags,
3610 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003611{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003612 /* fmt = '%#.' + `prec` + `type`
3613 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003614 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003615 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003616 x = PyFloat_AsDouble(v);
3617 if (x == -1.0 && PyErr_Occurred()) {
3618 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003619 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003620 }
Guido van Rossume5372401993-03-16 12:15:04 +00003621 if (prec < 0)
3622 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003623 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3624 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003625 /* Worst case length calc to ensure no buffer overrun:
3626
3627 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003628 fmt = %#.<prec>g
3629 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003630 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003631 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003632
3633 'f' formats:
3634 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3635 len = 1 + 50 + 1 + prec = 52 + prec
3636
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003637 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003638 always given), therefore increase the length by one.
3639
3640 */
3641 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3642 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003643 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003644 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003645 return -1;
3646 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003647 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3648 (flags&F_ALT) ? "#" : "",
3649 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003650 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003651 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003652}
3653
Tim Peters38fd5b62000-09-21 05:43:11 +00003654/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3655 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3656 * Python's regular ints.
3657 * Return value: a new PyString*, or NULL if error.
3658 * . *pbuf is set to point into it,
3659 * *plen set to the # of chars following that.
3660 * Caller must decref it when done using pbuf.
3661 * The string starting at *pbuf is of the form
3662 * "-"? ("0x" | "0X")? digit+
3663 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003664 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003665 * There will be at least prec digits, zero-filled on the left if
3666 * necessary to get that many.
3667 * val object to be converted
3668 * flags bitmask of format flags; only F_ALT is looked at
3669 * prec minimum number of digits; 0-fill on left if needed
3670 * type a character in [duoxX]; u acts the same as d
3671 *
3672 * CAUTION: o, x and X conversions on regular ints can never
3673 * produce a '-' sign, but can for Python's unbounded ints.
3674 */
3675PyObject*
3676_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3677 char **pbuf, int *plen)
3678{
3679 PyObject *result = NULL;
3680 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003681 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003682 int sign; /* 1 if '-', else 0 */
3683 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003684 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003685 int numdigits; /* len == numnondigits + numdigits */
3686 int numnondigits = 0;
3687
3688 switch (type) {
3689 case 'd':
3690 case 'u':
3691 result = val->ob_type->tp_str(val);
3692 break;
3693 case 'o':
3694 result = val->ob_type->tp_as_number->nb_oct(val);
3695 break;
3696 case 'x':
3697 case 'X':
3698 numnondigits = 2;
3699 result = val->ob_type->tp_as_number->nb_hex(val);
3700 break;
3701 default:
3702 assert(!"'type' not in [duoxX]");
3703 }
3704 if (!result)
3705 return NULL;
3706
3707 /* To modify the string in-place, there can only be one reference. */
3708 if (result->ob_refcnt != 1) {
3709 PyErr_BadInternalCall();
3710 return NULL;
3711 }
3712 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00003713 llen = PyString_Size(result);
3714 if (llen > INT_MAX) {
3715 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
3716 return NULL;
3717 }
3718 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003719 if (buf[len-1] == 'L') {
3720 --len;
3721 buf[len] = '\0';
3722 }
3723 sign = buf[0] == '-';
3724 numnondigits += sign;
3725 numdigits = len - numnondigits;
3726 assert(numdigits > 0);
3727
Tim Petersfff53252001-04-12 18:38:48 +00003728 /* Get rid of base marker unless F_ALT */
3729 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003730 /* Need to skip 0x, 0X or 0. */
3731 int skipped = 0;
3732 switch (type) {
3733 case 'o':
3734 assert(buf[sign] == '0');
3735 /* If 0 is only digit, leave it alone. */
3736 if (numdigits > 1) {
3737 skipped = 1;
3738 --numdigits;
3739 }
3740 break;
3741 case 'x':
3742 case 'X':
3743 assert(buf[sign] == '0');
3744 assert(buf[sign + 1] == 'x');
3745 skipped = 2;
3746 numnondigits -= 2;
3747 break;
3748 }
3749 if (skipped) {
3750 buf += skipped;
3751 len -= skipped;
3752 if (sign)
3753 buf[0] = '-';
3754 }
3755 assert(len == numnondigits + numdigits);
3756 assert(numdigits > 0);
3757 }
3758
3759 /* Fill with leading zeroes to meet minimum width. */
3760 if (prec > numdigits) {
3761 PyObject *r1 = PyString_FromStringAndSize(NULL,
3762 numnondigits + prec);
3763 char *b1;
3764 if (!r1) {
3765 Py_DECREF(result);
3766 return NULL;
3767 }
3768 b1 = PyString_AS_STRING(r1);
3769 for (i = 0; i < numnondigits; ++i)
3770 *b1++ = *buf++;
3771 for (i = 0; i < prec - numdigits; i++)
3772 *b1++ = '0';
3773 for (i = 0; i < numdigits; i++)
3774 *b1++ = *buf++;
3775 *b1 = '\0';
3776 Py_DECREF(result);
3777 result = r1;
3778 buf = PyString_AS_STRING(result);
3779 len = numnondigits + prec;
3780 }
3781
3782 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003783 if (type == 'X') {
3784 /* Need to convert all lower case letters to upper case.
3785 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003786 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003787 if (buf[i] >= 'a' && buf[i] <= 'x')
3788 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003789 }
3790 *pbuf = buf;
3791 *plen = len;
3792 return result;
3793}
3794
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003795static int
Fred Drakeba096332000-07-09 07:04:36 +00003796formatint(char *buf, size_t buflen, int flags,
3797 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003798{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003799 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003800 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3801 + 1 + 1 = 24 */
3802 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003803 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003804 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003805
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003806 x = PyInt_AsLong(v);
3807 if (x == -1 && PyErr_Occurred()) {
3808 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003809 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003810 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003811 if (x < 0 && type == 'u') {
3812 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003813 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003814 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3815 sign = "-";
3816 else
3817 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003818 if (prec < 0)
3819 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003820
3821 if ((flags & F_ALT) &&
3822 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003823 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003824 * of issues that cause pain:
3825 * - when 0 is being converted, the C standard leaves off
3826 * the '0x' or '0X', which is inconsistent with other
3827 * %#x/%#X conversions and inconsistent with Python's
3828 * hex() function
3829 * - there are platforms that violate the standard and
3830 * convert 0 with the '0x' or '0X'
3831 * (Metrowerks, Compaq Tru64)
3832 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003833 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003834 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003835 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003836 * We can achieve the desired consistency by inserting our
3837 * own '0x' or '0X' prefix, and substituting %x/%X in place
3838 * of %#x/%#X.
3839 *
3840 * Note that this is the same approach as used in
3841 * formatint() in unicodeobject.c
3842 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003843 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3844 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003845 }
3846 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003847 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3848 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003849 prec, type);
3850 }
3851
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003852 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3853 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003854 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003855 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003856 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003857 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003858 return -1;
3859 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003860 if (sign[0])
3861 PyOS_snprintf(buf, buflen, fmt, -x);
3862 else
3863 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003864 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003865}
3866
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003867static int
Fred Drakeba096332000-07-09 07:04:36 +00003868formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003869{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003870 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003871 if (PyString_Check(v)) {
3872 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003873 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003874 }
3875 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003876 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003877 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003878 }
3879 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003880 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003881}
3882
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003883/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3884
3885 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3886 chars are formatted. XXX This is a magic number. Each formatting
3887 routine does bounds checking to ensure no overflow, but a better
3888 solution may be to malloc a buffer of appropriate size for each
3889 format. For now, the current solution is sufficient.
3890*/
3891#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003892
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003893PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003894PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003895{
3896 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003897 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003898 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00003899 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003900 PyObject *result, *orig_args;
3901#ifdef Py_USING_UNICODE
3902 PyObject *v, *w;
3903#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003904 PyObject *dict = NULL;
3905 if (format == NULL || !PyString_Check(format) || args == NULL) {
3906 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003907 return NULL;
3908 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003909 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003910 fmt = PyString_AS_STRING(format);
3911 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003912 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003913 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003914 if (result == NULL)
3915 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003916 res = PyString_AsString(result);
3917 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003918 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003919 argidx = 0;
3920 }
3921 else {
3922 arglen = -1;
3923 argidx = -2;
3924 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003925 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3926 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003927 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003928 while (--fmtcnt >= 0) {
3929 if (*fmt != '%') {
3930 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003931 rescnt = fmtcnt + 100;
3932 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003933 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003934 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003935 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003936 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003937 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003938 }
3939 *res++ = *fmt++;
3940 }
3941 else {
3942 /* Got a format specifier */
3943 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003944 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003945 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003946 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003947 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003948 PyObject *v = NULL;
3949 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003950 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003951 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003952 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003953 char formatbuf[FORMATBUFLEN];
3954 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003955#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003956 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003957 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003958#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003959
Guido van Rossumda9c2711996-12-05 21:58:58 +00003960 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003961 if (*fmt == '(') {
3962 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003963 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003964 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003965 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003966
3967 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003968 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003969 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003970 goto error;
3971 }
3972 ++fmt;
3973 --fmtcnt;
3974 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003975 /* Skip over balanced parentheses */
3976 while (pcount > 0 && --fmtcnt >= 0) {
3977 if (*fmt == ')')
3978 --pcount;
3979 else if (*fmt == '(')
3980 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003981 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003982 }
3983 keylen = fmt - keystart - 1;
3984 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003985 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003986 "incomplete format key");
3987 goto error;
3988 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003989 key = PyString_FromStringAndSize(keystart,
3990 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003991 if (key == NULL)
3992 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003993 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003994 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003995 args_owned = 0;
3996 }
3997 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003998 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003999 if (args == NULL) {
4000 goto error;
4001 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004002 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004003 arglen = -1;
4004 argidx = -2;
4005 }
Guido van Rossume5372401993-03-16 12:15:04 +00004006 while (--fmtcnt >= 0) {
4007 switch (c = *fmt++) {
4008 case '-': flags |= F_LJUST; continue;
4009 case '+': flags |= F_SIGN; continue;
4010 case ' ': flags |= F_BLANK; continue;
4011 case '#': flags |= F_ALT; continue;
4012 case '0': flags |= F_ZERO; continue;
4013 }
4014 break;
4015 }
4016 if (c == '*') {
4017 v = getnextarg(args, arglen, &argidx);
4018 if (v == NULL)
4019 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004020 if (!PyInt_Check(v)) {
4021 PyErr_SetString(PyExc_TypeError,
4022 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004023 goto error;
4024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004025 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004026 if (width < 0) {
4027 flags |= F_LJUST;
4028 width = -width;
4029 }
Guido van Rossume5372401993-03-16 12:15:04 +00004030 if (--fmtcnt >= 0)
4031 c = *fmt++;
4032 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004033 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004034 width = c - '0';
4035 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004036 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004037 if (!isdigit(c))
4038 break;
4039 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004040 PyErr_SetString(
4041 PyExc_ValueError,
4042 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004043 goto error;
4044 }
4045 width = width*10 + (c - '0');
4046 }
4047 }
4048 if (c == '.') {
4049 prec = 0;
4050 if (--fmtcnt >= 0)
4051 c = *fmt++;
4052 if (c == '*') {
4053 v = getnextarg(args, arglen, &argidx);
4054 if (v == NULL)
4055 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004056 if (!PyInt_Check(v)) {
4057 PyErr_SetString(
4058 PyExc_TypeError,
4059 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004060 goto error;
4061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004062 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004063 if (prec < 0)
4064 prec = 0;
4065 if (--fmtcnt >= 0)
4066 c = *fmt++;
4067 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004068 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004069 prec = c - '0';
4070 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004071 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004072 if (!isdigit(c))
4073 break;
4074 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004075 PyErr_SetString(
4076 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004077 "prec too big");
4078 goto error;
4079 }
4080 prec = prec*10 + (c - '0');
4081 }
4082 }
4083 } /* prec */
4084 if (fmtcnt >= 0) {
4085 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004086 if (--fmtcnt >= 0)
4087 c = *fmt++;
4088 }
4089 }
4090 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004091 PyErr_SetString(PyExc_ValueError,
4092 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004093 goto error;
4094 }
4095 if (c != '%') {
4096 v = getnextarg(args, arglen, &argidx);
4097 if (v == NULL)
4098 goto error;
4099 }
4100 sign = 0;
4101 fill = ' ';
4102 switch (c) {
4103 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004104 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004105 len = 1;
4106 break;
4107 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004108#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004109 if (PyUnicode_Check(v)) {
4110 fmt = fmt_start;
4111 argidx = argidx_start;
4112 goto unicode;
4113 }
Georg Brandld45014b2005-10-01 17:06:00 +00004114#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004115 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004116#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004117 if (temp != NULL && PyUnicode_Check(temp)) {
4118 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004119 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004120 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004121 goto unicode;
4122 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004123#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004124 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004125 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004126 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004127 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004128 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004129 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004130 if (!PyString_Check(temp)) {
4131 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004132 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004133 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004134 goto error;
4135 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004136 pbuf = PyString_AS_STRING(temp);
4137 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004138 if (prec >= 0 && len > prec)
4139 len = prec;
4140 break;
4141 case 'i':
4142 case 'd':
4143 case 'u':
4144 case 'o':
4145 case 'x':
4146 case 'X':
4147 if (c == 'i')
4148 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004149 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004150 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004151 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004152 prec, c, &pbuf, &ilen);
4153 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004154 if (!temp)
4155 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004156 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004157 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004158 else {
4159 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004160 len = formatint(pbuf,
4161 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004162 flags, prec, c, v);
4163 if (len < 0)
4164 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004165 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004166 }
4167 if (flags & F_ZERO)
4168 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004169 break;
4170 case 'e':
4171 case 'E':
4172 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004173 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004174 case 'g':
4175 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004176 if (c == 'F')
4177 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004178 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004179 len = formatfloat(pbuf, sizeof(formatbuf),
4180 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004181 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004182 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004183 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004184 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004185 fill = '0';
4186 break;
4187 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004188#ifdef Py_USING_UNICODE
4189 if (PyUnicode_Check(v)) {
4190 fmt = fmt_start;
4191 argidx = argidx_start;
4192 goto unicode;
4193 }
4194#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004195 pbuf = formatbuf;
4196 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004197 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004198 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004199 break;
4200 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004201 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004202 "unsupported format character '%c' (0x%x) "
4203 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004204 c, c,
4205 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004206 goto error;
4207 }
4208 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004209 if (*pbuf == '-' || *pbuf == '+') {
4210 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004211 len--;
4212 }
4213 else if (flags & F_SIGN)
4214 sign = '+';
4215 else if (flags & F_BLANK)
4216 sign = ' ';
4217 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004218 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004219 }
4220 if (width < len)
4221 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004222 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004223 reslen -= rescnt;
4224 rescnt = width + fmtcnt + 100;
4225 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004226 if (reslen < 0) {
4227 Py_DECREF(result);
4228 return PyErr_NoMemory();
4229 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004230 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004231 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004232 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004233 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004234 }
4235 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004236 if (fill != ' ')
4237 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004238 rescnt--;
4239 if (width > len)
4240 width--;
4241 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004242 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4243 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004244 assert(pbuf[1] == c);
4245 if (fill != ' ') {
4246 *res++ = *pbuf++;
4247 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004248 }
Tim Petersfff53252001-04-12 18:38:48 +00004249 rescnt -= 2;
4250 width -= 2;
4251 if (width < 0)
4252 width = 0;
4253 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004254 }
4255 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004256 do {
4257 --rescnt;
4258 *res++ = fill;
4259 } while (--width > len);
4260 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004261 if (fill == ' ') {
4262 if (sign)
4263 *res++ = sign;
4264 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004265 (c == 'x' || c == 'X')) {
4266 assert(pbuf[0] == '0');
4267 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004268 *res++ = *pbuf++;
4269 *res++ = *pbuf++;
4270 }
4271 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004272 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004273 res += len;
4274 rescnt -= len;
4275 while (--width >= len) {
4276 --rescnt;
4277 *res++ = ' ';
4278 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004279 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004280 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004281 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004282 goto error;
4283 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004284 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004285 } /* '%' */
4286 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004287 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004288 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004289 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004290 goto error;
4291 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004292 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004293 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004294 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004295 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004296 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004297
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004298#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004299 unicode:
4300 if (args_owned) {
4301 Py_DECREF(args);
4302 args_owned = 0;
4303 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004304 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004305 if (PyTuple_Check(orig_args) && argidx > 0) {
4306 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004307 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004308 v = PyTuple_New(n);
4309 if (v == NULL)
4310 goto error;
4311 while (--n >= 0) {
4312 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4313 Py_INCREF(w);
4314 PyTuple_SET_ITEM(v, n, w);
4315 }
4316 args = v;
4317 } else {
4318 Py_INCREF(orig_args);
4319 args = orig_args;
4320 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004321 args_owned = 1;
4322 /* Take what we have of the result and let the Unicode formatting
4323 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004324 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004325 if (_PyString_Resize(&result, rescnt))
4326 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004327 fmtcnt = PyString_GET_SIZE(format) - \
4328 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004329 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4330 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004331 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004332 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004333 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004334 if (v == NULL)
4335 goto error;
4336 /* Paste what we have (result) to what the Unicode formatting
4337 function returned (v) and return the result (or error) */
4338 w = PyUnicode_Concat(result, v);
4339 Py_DECREF(result);
4340 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004341 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004342 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004343#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004344
Guido van Rossume5372401993-03-16 12:15:04 +00004345 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004346 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004347 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004348 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004349 }
Guido van Rossume5372401993-03-16 12:15:04 +00004350 return NULL;
4351}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004352
Guido van Rossum2a61e741997-01-18 07:55:05 +00004353void
Fred Drakeba096332000-07-09 07:04:36 +00004354PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004355{
4356 register PyStringObject *s = (PyStringObject *)(*p);
4357 PyObject *t;
4358 if (s == NULL || !PyString_Check(s))
4359 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004360 /* If it's a string subclass, we don't really know what putting
4361 it in the interned dict might do. */
4362 if (!PyString_CheckExact(s))
4363 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004364 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004365 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004366 if (interned == NULL) {
4367 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004368 if (interned == NULL) {
4369 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004370 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004371 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004372 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004373 t = PyDict_GetItem(interned, (PyObject *)s);
4374 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004375 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004376 Py_DECREF(*p);
4377 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004378 return;
4379 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004380
Armin Rigo79f7ad22004-08-07 19:27:39 +00004381 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004382 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004383 return;
4384 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004385 /* The two references in interned are not counted by refcnt.
4386 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004387 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004388 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004389}
4390
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004391void
4392PyString_InternImmortal(PyObject **p)
4393{
4394 PyString_InternInPlace(p);
4395 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4396 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4397 Py_INCREF(*p);
4398 }
4399}
4400
Guido van Rossum2a61e741997-01-18 07:55:05 +00004401
4402PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004403PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004404{
4405 PyObject *s = PyString_FromString(cp);
4406 if (s == NULL)
4407 return NULL;
4408 PyString_InternInPlace(&s);
4409 return s;
4410}
4411
Guido van Rossum8cf04761997-08-02 02:57:45 +00004412void
Fred Drakeba096332000-07-09 07:04:36 +00004413PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004414{
4415 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004416 for (i = 0; i < UCHAR_MAX + 1; i++) {
4417 Py_XDECREF(characters[i]);
4418 characters[i] = NULL;
4419 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004420 Py_XDECREF(nullstring);
4421 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004422}
Barry Warsawa903ad982001-02-23 16:40:48 +00004423
Barry Warsawa903ad982001-02-23 16:40:48 +00004424void _Py_ReleaseInternedStrings(void)
4425{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004426 PyObject *keys;
4427 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004428 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004429
4430 if (interned == NULL || !PyDict_Check(interned))
4431 return;
4432 keys = PyDict_Keys(interned);
4433 if (keys == NULL || !PyList_Check(keys)) {
4434 PyErr_Clear();
4435 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004436 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004437
4438 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4439 detector, interned strings are not forcibly deallocated; rather, we
4440 give them their stolen references back, and then clear and DECREF
4441 the interned dict. */
4442
4443 fprintf(stderr, "releasing interned strings\n");
4444 n = PyList_GET_SIZE(keys);
4445 for (i = 0; i < n; i++) {
4446 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4447 switch (s->ob_sstate) {
4448 case SSTATE_NOT_INTERNED:
4449 /* XXX Shouldn't happen */
4450 break;
4451 case SSTATE_INTERNED_IMMORTAL:
4452 s->ob_refcnt += 1;
4453 break;
4454 case SSTATE_INTERNED_MORTAL:
4455 s->ob_refcnt += 2;
4456 break;
4457 default:
4458 Py_FatalError("Inconsistent interned string state.");
4459 }
4460 s->ob_sstate = SSTATE_NOT_INTERNED;
4461 }
4462 Py_DECREF(keys);
4463 PyDict_Clear(interned);
4464 Py_DECREF(interned);
4465 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004466}