blob: 84803083745094e0450e86810a44e4572223df3a [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
19 Another way to look at this is that to say that the actual reference
20 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000157 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000184 /* likewise for %zd */
185 if (*f == 'z' && *(f+1) == 'd')
186 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000187
Barry Warsawdadace02001-08-24 18:32:06 +0000188 switch (*f) {
189 case 'c':
190 (void)va_arg(count, int);
191 /* fall through... */
192 case '%':
193 n++;
194 break;
195 case 'd': case 'i': case 'x':
196 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000197 /* 20 bytes is enough to hold a 64-bit
198 integer. Decimal takes the most space.
199 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000200 n += 20;
201 break;
202 case 's':
203 s = va_arg(count, char*);
204 n += strlen(s);
205 break;
206 case 'p':
207 (void) va_arg(count, int);
208 /* maximum 64-bit pointer representation:
209 * 0xffffffffffffffff
210 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000211 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000212 */
213 n += 19;
214 break;
215 default:
216 /* if we stumble upon an unknown
217 formatting code, copy the rest of
218 the format string to the output
219 string. (we cannot just skip the
220 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000221 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000222 n += strlen(p);
223 goto expand;
224 }
225 } else
226 n++;
227 }
228 expand:
229 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000230 /* Since we've analyzed how much space we need for the worst case,
231 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 string = PyString_FromStringAndSize(NULL, n);
233 if (!string)
234 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000235
Barry Warsawdadace02001-08-24 18:32:06 +0000236 s = PyString_AsString(string);
237
238 for (f = format; *f; f++) {
239 if (*f == '%') {
240 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000241 Py_ssize_t i;
242 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000243 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 /* handle the size_t flag. */
264 if (*f == 'z' && *(f+1) == 'd') {
265 size_tflag = 1;
266 ++f;
267 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000268
Barry Warsawdadace02001-08-24 18:32:06 +0000269 switch (*f) {
270 case 'c':
271 *s++ = va_arg(vargs, int);
272 break;
273 case 'd':
274 if (longflag)
275 sprintf(s, "%ld", va_arg(vargs, long));
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000276 else if (size_tflag) {
277 /* Instead of checking whether the C
278 library supports %zd, handle the
279 common cases. */
280 #if SIZEOF_SIZE_T == SIZEOF_LONG
281 sprintf(s, "%ld", va_arg(vargs, long));
282 #elif defined(MS_WINDOWS)
283 sprintf(s, "%Id", va_arg(vargs, size_t));
284 #else
285 #error Cannot print size_t values
286 #endif
287 }
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
289 sprintf(s, "%d", va_arg(vargs, int));
290 s += strlen(s);
291 break;
292 case 'i':
293 sprintf(s, "%i", va_arg(vargs, int));
294 s += strlen(s);
295 break;
296 case 'x':
297 sprintf(s, "%x", va_arg(vargs, int));
298 s += strlen(s);
299 break;
300 case 's':
301 p = va_arg(vargs, char*);
302 i = strlen(p);
303 if (n > 0 && i > n)
304 i = n;
305 memcpy(s, p, i);
306 s += i;
307 break;
308 case 'p':
309 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000310 /* %p is ill-defined: ensure leading 0x. */
311 if (s[1] == 'X')
312 s[1] = 'x';
313 else if (s[1] != 'x') {
314 memmove(s+2, s, strlen(s)+1);
315 s[0] = '0';
316 s[1] = 'x';
317 }
Barry Warsawdadace02001-08-24 18:32:06 +0000318 s += strlen(s);
319 break;
320 case '%':
321 *s++ = '%';
322 break;
323 default:
324 strcpy(s, p);
325 s += strlen(s);
326 goto end;
327 }
328 } else
329 *s++ = *f;
330 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000331
Barry Warsawdadace02001-08-24 18:32:06 +0000332 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000333 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000334 return string;
335}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000336
Barry Warsawdadace02001-08-24 18:32:06 +0000337PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000338PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000339{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000340 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000341 va_list vargs;
342
343#ifdef HAVE_STDARG_PROTOTYPES
344 va_start(vargs, format);
345#else
346 va_start(vargs);
347#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000348 ret = PyString_FromFormatV(format, vargs);
349 va_end(vargs);
350 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000351}
352
353
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000355 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356 const char *encoding,
357 const char *errors)
358{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000359 PyObject *v, *str;
360
361 str = PyString_FromStringAndSize(s, size);
362 if (str == NULL)
363 return NULL;
364 v = PyString_AsDecodedString(str, encoding, errors);
365 Py_DECREF(str);
366 return v;
367}
368
369PyObject *PyString_AsDecodedObject(PyObject *str,
370 const char *encoding,
371 const char *errors)
372{
373 PyObject *v;
374
375 if (!PyString_Check(str)) {
376 PyErr_BadArgument();
377 goto onError;
378 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000379
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000380 if (encoding == NULL) {
381#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000382 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000383#else
384 PyErr_SetString(PyExc_ValueError, "no encoding specified");
385 goto onError;
386#endif
387 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388
389 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000390 v = PyCodec_Decode(str, encoding, errors);
391 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000392 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000393
394 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000395
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000396 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000397 return NULL;
398}
399
400PyObject *PyString_AsDecodedString(PyObject *str,
401 const char *encoding,
402 const char *errors)
403{
404 PyObject *v;
405
406 v = PyString_AsDecodedObject(str, encoding, errors);
407 if (v == NULL)
408 goto onError;
409
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000410#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411 /* Convert Unicode to a string using the default encoding */
412 if (PyUnicode_Check(v)) {
413 PyObject *temp = v;
414 v = PyUnicode_AsEncodedString(v, NULL, NULL);
415 Py_DECREF(temp);
416 if (v == NULL)
417 goto onError;
418 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000419#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000420 if (!PyString_Check(v)) {
421 PyErr_Format(PyExc_TypeError,
422 "decoder did not return a string object (type=%.400s)",
423 v->ob_type->tp_name);
424 Py_DECREF(v);
425 goto onError;
426 }
427
428 return v;
429
430 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000431 return NULL;
432}
433
434PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000435 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436 const char *encoding,
437 const char *errors)
438{
439 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000440
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 str = PyString_FromStringAndSize(s, size);
442 if (str == NULL)
443 return NULL;
444 v = PyString_AsEncodedString(str, encoding, errors);
445 Py_DECREF(str);
446 return v;
447}
448
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000449PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000450 const char *encoding,
451 const char *errors)
452{
453 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000454
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000455 if (!PyString_Check(str)) {
456 PyErr_BadArgument();
457 goto onError;
458 }
459
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000460 if (encoding == NULL) {
461#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000463#else
464 PyErr_SetString(PyExc_ValueError, "no encoding specified");
465 goto onError;
466#endif
467 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468
469 /* Encode via the codec registry */
470 v = PyCodec_Encode(str, encoding, errors);
471 if (v == NULL)
472 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000473
474 return v;
475
476 onError:
477 return NULL;
478}
479
480PyObject *PyString_AsEncodedString(PyObject *str,
481 const char *encoding,
482 const char *errors)
483{
484 PyObject *v;
485
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000486 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000487 if (v == NULL)
488 goto onError;
489
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000490#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 /* Convert Unicode to a string using the default encoding */
492 if (PyUnicode_Check(v)) {
493 PyObject *temp = v;
494 v = PyUnicode_AsEncodedString(v, NULL, NULL);
495 Py_DECREF(temp);
496 if (v == NULL)
497 goto onError;
498 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000499#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000500 if (!PyString_Check(v)) {
501 PyErr_Format(PyExc_TypeError,
502 "encoder did not return a string object (type=%.400s)",
503 v->ob_type->tp_name);
504 Py_DECREF(v);
505 goto onError;
506 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000507
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000508 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000509
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000510 onError:
511 return NULL;
512}
513
Guido van Rossum234f9421993-06-17 12:35:49 +0000514static void
Fred Drakeba096332000-07-09 07:04:36 +0000515string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000516{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000517 switch (PyString_CHECK_INTERNED(op)) {
518 case SSTATE_NOT_INTERNED:
519 break;
520
521 case SSTATE_INTERNED_MORTAL:
522 /* revive dead object temporarily for DelItem */
523 op->ob_refcnt = 3;
524 if (PyDict_DelItem(interned, op) != 0)
525 Py_FatalError(
526 "deletion of interned string failed");
527 break;
528
529 case SSTATE_INTERNED_IMMORTAL:
530 Py_FatalError("Immortal interned string died.");
531
532 default:
533 Py_FatalError("Inconsistent interned string state.");
534 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000535 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000536}
537
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538/* Unescape a backslash-escaped string. If unicode is non-zero,
539 the string is a u-literal. If recode_encoding is non-zero,
540 the string is UTF-8 encoded and should be re-encoded in the
541 specified encoding. */
542
543PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000544 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000545 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000546 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000547 const char *recode_encoding)
548{
549 int c;
550 char *p, *buf;
551 const char *end;
552 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000553 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000554 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000555 if (v == NULL)
556 return NULL;
557 p = buf = PyString_AsString(v);
558 end = s + len;
559 while (s < end) {
560 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000561 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000562#ifdef Py_USING_UNICODE
563 if (recode_encoding && (*s & 0x80)) {
564 PyObject *u, *w;
565 char *r;
566 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000567 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000568 t = s;
569 /* Decode non-ASCII bytes as UTF-8. */
570 while (t < end && (*t & 0x80)) t++;
571 u = PyUnicode_DecodeUTF8(s, t - s, errors);
572 if(!u) goto failed;
573
574 /* Recode them in target encoding. */
575 w = PyUnicode_AsEncodedString(
576 u, recode_encoding, errors);
577 Py_DECREF(u);
578 if (!w) goto failed;
579
580 /* Append bytes to output buffer. */
581 r = PyString_AsString(w);
582 rn = PyString_Size(w);
583 memcpy(p, r, rn);
584 p += rn;
585 Py_DECREF(w);
586 s = t;
587 } else {
588 *p++ = *s++;
589 }
590#else
591 *p++ = *s++;
592#endif
593 continue;
594 }
595 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000596 if (s==end) {
597 PyErr_SetString(PyExc_ValueError,
598 "Trailing \\ in string");
599 goto failed;
600 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000601 switch (*s++) {
602 /* XXX This assumes ASCII! */
603 case '\n': break;
604 case '\\': *p++ = '\\'; break;
605 case '\'': *p++ = '\''; break;
606 case '\"': *p++ = '\"'; break;
607 case 'b': *p++ = '\b'; break;
608 case 'f': *p++ = '\014'; break; /* FF */
609 case 't': *p++ = '\t'; break;
610 case 'n': *p++ = '\n'; break;
611 case 'r': *p++ = '\r'; break;
612 case 'v': *p++ = '\013'; break; /* VT */
613 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
614 case '0': case '1': case '2': case '3':
615 case '4': case '5': case '6': case '7':
616 c = s[-1] - '0';
617 if ('0' <= *s && *s <= '7') {
618 c = (c<<3) + *s++ - '0';
619 if ('0' <= *s && *s <= '7')
620 c = (c<<3) + *s++ - '0';
621 }
622 *p++ = c;
623 break;
624 case 'x':
625 if (isxdigit(Py_CHARMASK(s[0]))
626 && isxdigit(Py_CHARMASK(s[1]))) {
627 unsigned int x = 0;
628 c = Py_CHARMASK(*s);
629 s++;
630 if (isdigit(c))
631 x = c - '0';
632 else if (islower(c))
633 x = 10 + c - 'a';
634 else
635 x = 10 + c - 'A';
636 x = x << 4;
637 c = Py_CHARMASK(*s);
638 s++;
639 if (isdigit(c))
640 x += c - '0';
641 else if (islower(c))
642 x += 10 + c - 'a';
643 else
644 x += 10 + c - 'A';
645 *p++ = x;
646 break;
647 }
648 if (!errors || strcmp(errors, "strict") == 0) {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000649 PyErr_SetString(PyExc_ValueError,
650 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000651 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 }
653 if (strcmp(errors, "replace") == 0) {
654 *p++ = '?';
655 } else if (strcmp(errors, "ignore") == 0)
656 /* do nothing */;
657 else {
658 PyErr_Format(PyExc_ValueError,
659 "decoding error; "
660 "unknown error handling code: %.400s",
661 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000662 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000663 }
664#ifndef Py_USING_UNICODE
665 case 'u':
666 case 'U':
667 case 'N':
668 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000669 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000670 "Unicode escapes not legal "
671 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000672 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000673 }
674#endif
675 default:
676 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000677 s--;
678 goto non_esc; /* an arbitry number of unescaped
679 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000680 }
681 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000682 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000683 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 return v;
685 failed:
686 Py_DECREF(v);
687 return NULL;
688}
689
Martin v. Löwis18e16552006-02-15 17:27:45 +0000690static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000691string_getsize(register PyObject *op)
692{
693 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000694 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000695 if (PyString_AsStringAndSize(op, &s, &len))
696 return -1;
697 return len;
698}
699
700static /*const*/ char *
701string_getbuffer(register PyObject *op)
702{
703 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000704 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705 if (PyString_AsStringAndSize(op, &s, &len))
706 return NULL;
707 return s;
708}
709
Martin v. Löwis18e16552006-02-15 17:27:45 +0000710Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000711PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000713 if (!PyString_Check(op))
714 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000715 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716}
717
718/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000719PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000720{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721 if (!PyString_Check(op))
722 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000723 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000724}
725
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726int
727PyString_AsStringAndSize(register PyObject *obj,
728 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000729 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730{
731 if (s == NULL) {
732 PyErr_BadInternalCall();
733 return -1;
734 }
735
736 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000737#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000738 if (PyUnicode_Check(obj)) {
739 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
740 if (obj == NULL)
741 return -1;
742 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000743 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000744#endif
745 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000746 PyErr_Format(PyExc_TypeError,
747 "expected string or Unicode object, "
748 "%.200s found", obj->ob_type->tp_name);
749 return -1;
750 }
751 }
752
753 *s = PyString_AS_STRING(obj);
754 if (len != NULL)
755 *len = PyString_GET_SIZE(obj);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000756 else if (strlen(*s) != PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000757 PyErr_SetString(PyExc_TypeError,
758 "expected string without null bytes");
759 return -1;
760 }
761 return 0;
762}
763
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000764/* Methods */
765
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000766static int
Fred Drakeba096332000-07-09 07:04:36 +0000767string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000768{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000769 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000771 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000772
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000773 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000774 if (! PyString_CheckExact(op)) {
775 int ret;
776 /* A str subclass may have its own __str__ method. */
777 op = (PyStringObject *) PyObject_Str((PyObject *)op);
778 if (op == NULL)
779 return -1;
780 ret = string_print(op, fp, flags);
781 Py_DECREF(op);
782 return ret;
783 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000784 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000785#ifdef __VMS
786 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
787#else
788 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
789#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000792
Thomas Wouters7e474022000-07-16 12:04:32 +0000793 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000795 if (memchr(op->ob_sval, '\'', op->ob_size) &&
796 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000797 quote = '"';
798
799 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800 for (i = 0; i < op->ob_size; i++) {
801 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000802 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000803 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000804 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000805 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000806 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000807 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000808 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000809 fprintf(fp, "\\r");
810 else if (c < ' ' || c >= 0x7f)
811 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000812 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000813 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000814 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000815 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000816 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000817}
818
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000819PyObject *
820PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000821{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000822 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000823 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000824 PyObject *v;
825 if (newsize > INT_MAX) {
826 PyErr_SetString(PyExc_OverflowError,
827 "string is too large to make repr");
828 }
829 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000831 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 }
833 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000834 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000835 register char c;
836 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 int quote;
838
Thomas Wouters7e474022000-07-16 12:04:32 +0000839 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000840 quote = '\'';
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841 if (smartquotes &&
842 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000843 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000844 quote = '"';
845
Tim Peters9161c8b2001-12-03 01:55:38 +0000846 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000847 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000849 /* There's at least enough room for a hex escape
850 and a closing quote. */
851 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000853 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000855 else if (c == '\t')
856 *p++ = '\\', *p++ = 't';
857 else if (c == '\n')
858 *p++ = '\\', *p++ = 'n';
859 else if (c == '\r')
860 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000861 else if (c < ' ' || c >= 0x7f) {
862 /* For performance, we don't want to call
863 PyOS_snprintf here (extra layers of
864 function call). */
865 sprintf(p, "\\x%02x", c & 0xff);
866 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000867 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000868 else
869 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000871 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000872 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000873 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000874 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000875 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000876 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000878}
879
Guido van Rossum189f1df2001-05-01 16:51:53 +0000880static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000881string_repr(PyObject *op)
882{
883 return PyString_Repr(op, 1);
884}
885
886static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000887string_str(PyObject *s)
888{
Tim Petersc9933152001-10-16 20:18:24 +0000889 assert(PyString_Check(s));
890 if (PyString_CheckExact(s)) {
891 Py_INCREF(s);
892 return s;
893 }
894 else {
895 /* Subtype -- return genuine string with the same value. */
896 PyStringObject *t = (PyStringObject *) s;
897 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
898 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000899}
900
Martin v. Löwis18e16552006-02-15 17:27:45 +0000901static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000902string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000903{
904 return a->ob_size;
905}
906
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000907static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000908string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000909{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000910 register size_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000911 register PyStringObject *op;
912 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000913#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000914 if (PyUnicode_Check(bb))
915 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000916#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000917 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000918 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000919 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920 return NULL;
921 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000922#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000924 if ((a->ob_size == 0 || b->ob_size == 0) &&
925 PyString_CheckExact(a) && PyString_CheckExact(b)) {
926 if (a->ob_size == 0) {
927 Py_INCREF(bb);
928 return bb;
929 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 Py_INCREF(a);
931 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000932 }
933 size = a->ob_size + b->ob_size;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000934 /* XXX check overflow */
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000935 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000936 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000937 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000938 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000939 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000940 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000941 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000942 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
943 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000944 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000946#undef b
947}
948
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000949static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000950string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000951{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000952 register Py_ssize_t i;
953 register Py_ssize_t j;
954 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000955 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000956 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000957 if (n < 0)
958 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000959 /* watch out for overflows: the size can overflow int,
960 * and the # of bytes needed can overflow size_t
961 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000962 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000963 if (n && size / n != a->ob_size) {
964 PyErr_SetString(PyExc_OverflowError,
965 "repeated string is too long");
966 return NULL;
967 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000968 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969 Py_INCREF(a);
970 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000971 }
Tim Peterse7c05322004-06-27 17:24:49 +0000972 nbytes = (size_t)size;
973 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000974 PyErr_SetString(PyExc_OverflowError,
975 "repeated string is too long");
976 return NULL;
977 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000978 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000979 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000980 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000981 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000982 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000983 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000984 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000985 op->ob_sval[size] = '\0';
986 if (a->ob_size == 1 && n > 0) {
987 memset(op->ob_sval, a->ob_sval[0] , n);
988 return (PyObject *) op;
989 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000990 i = 0;
991 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000992 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
993 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000994 }
995 while (i < size) {
996 j = (i <= size-i) ? i : size-i;
997 memcpy(op->ob_sval+i, op->ob_sval, j);
998 i += j;
999 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001000 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001}
1002
1003/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001006string_slice(register PyStringObject *a, register Py_ssize_t i,
1007 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001008 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001009{
1010 if (i < 0)
1011 i = 0;
1012 if (j < 0)
1013 j = 0; /* Avoid signed/unsigned bug in next line */
1014 if (j > a->ob_size)
1015 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001016 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1017 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001018 Py_INCREF(a);
1019 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001020 }
1021 if (j < i)
1022 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001023 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001024}
1025
Guido van Rossum9284a572000-03-07 15:53:43 +00001026static int
Fred Drakeba096332000-07-09 07:04:36 +00001027string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001028{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001029 char *s = PyString_AS_STRING(a);
1030 const char *sub = PyString_AS_STRING(el);
1031 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001032 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001033 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001034 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001035
1036 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001037#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001038 if (PyUnicode_Check(el))
1039 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001040#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001041 if (!PyString_Check(el)) {
1042 PyErr_SetString(PyExc_TypeError,
1043 "'in <string>' requires string as left operand");
1044 return -1;
1045 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001046 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001047
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001048 if (len_sub == 0)
1049 return 1;
1050 /* last points to one char beyond the start of the rightmost
1051 substring. When s<last, there is still room for a possible match
1052 and s[0] through s[len_sub-1] will be in bounds.
1053 shortsub is len_sub minus the last character which is checked
1054 separately just before the memcmp(). That check helps prevent
1055 false starts and saves the setup time for memcmp().
1056 */
1057 firstchar = sub[0];
1058 shortsub = len_sub - 1;
1059 lastchar = sub[shortsub];
1060 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1061 while (s < last) {
1062 s = memchr(s, firstchar, last-s);
1063 if (s == NULL)
1064 return 0;
1065 assert(s < last);
1066 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001067 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001068 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001069 }
1070 return 0;
1071}
1072
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001073static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001074string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001075{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001076 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001077 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001079 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001080 return NULL;
1081 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001082 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001083 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001084 if (v == NULL)
1085 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001086 else {
1087#ifdef COUNT_ALLOCS
1088 one_strings++;
1089#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001090 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001091 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001092 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093}
1094
Martin v. Löwiscd353062001-05-24 16:56:35 +00001095static PyObject*
1096string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001098 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001099 Py_ssize_t len_a, len_b;
1100 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001101 PyObject *result;
1102
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001103 /* Make sure both arguments are strings. */
1104 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001105 result = Py_NotImplemented;
1106 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001107 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001108 if (a == b) {
1109 switch (op) {
1110 case Py_EQ:case Py_LE:case Py_GE:
1111 result = Py_True;
1112 goto out;
1113 case Py_NE:case Py_LT:case Py_GT:
1114 result = Py_False;
1115 goto out;
1116 }
1117 }
1118 if (op == Py_EQ) {
1119 /* Supporting Py_NE here as well does not save
1120 much time, since Py_NE is rarely used. */
1121 if (a->ob_size == b->ob_size
1122 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001123 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001124 a->ob_size) == 0)) {
1125 result = Py_True;
1126 } else {
1127 result = Py_False;
1128 }
1129 goto out;
1130 }
1131 len_a = a->ob_size; len_b = b->ob_size;
1132 min_len = (len_a < len_b) ? len_a : len_b;
1133 if (min_len > 0) {
1134 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1135 if (c==0)
1136 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1137 }else
1138 c = 0;
1139 if (c == 0)
1140 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1141 switch (op) {
1142 case Py_LT: c = c < 0; break;
1143 case Py_LE: c = c <= 0; break;
1144 case Py_EQ: assert(0); break; /* unreachable */
1145 case Py_NE: c = c != 0; break;
1146 case Py_GT: c = c > 0; break;
1147 case Py_GE: c = c >= 0; break;
1148 default:
1149 result = Py_NotImplemented;
1150 goto out;
1151 }
1152 result = c ? Py_True : Py_False;
1153 out:
1154 Py_INCREF(result);
1155 return result;
1156}
1157
1158int
1159_PyString_Eq(PyObject *o1, PyObject *o2)
1160{
1161 PyStringObject *a, *b;
1162 a = (PyStringObject*)o1;
1163 b = (PyStringObject*)o2;
1164 return a->ob_size == b->ob_size
1165 && *a->ob_sval == *b->ob_sval
1166 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001167}
1168
Guido van Rossum9bfef441993-03-29 10:43:31 +00001169static long
Fred Drakeba096332000-07-09 07:04:36 +00001170string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001171{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001172 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001173 register unsigned char *p;
1174 register long x;
1175
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001176 if (a->ob_shash != -1)
1177 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001178 len = a->ob_size;
1179 p = (unsigned char *) a->ob_sval;
1180 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001181 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001182 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001183 x ^= a->ob_size;
1184 if (x == -1)
1185 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001186 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001187 return x;
1188}
1189
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001190static PyObject*
1191string_subscript(PyStringObject* self, PyObject* item)
1192{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001193 if (PyInt_Check(item) || PyLong_Check(item)) {
1194 Py_ssize_t i = PyInt_AsSsize_t(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001195 if (i == -1 && PyErr_Occurred())
1196 return NULL;
1197 if (i < 0)
1198 i += PyString_GET_SIZE(self);
1199 return string_item(self,i);
1200 }
1201 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001202 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001203 char* source_buf;
1204 char* result_buf;
1205 PyObject* result;
1206
1207 if (PySlice_GetIndicesEx((PySliceObject*)item,
1208 PyString_GET_SIZE(self),
1209 &start, &stop, &step, &slicelength) < 0) {
1210 return NULL;
1211 }
1212
1213 if (slicelength <= 0) {
1214 return PyString_FromStringAndSize("", 0);
1215 }
1216 else {
1217 source_buf = PyString_AsString((PyObject*)self);
1218 result_buf = PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001219 if (result_buf == NULL)
1220 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001221
1222 for (cur = start, i = 0; i < slicelength;
1223 cur += step, i++) {
1224 result_buf[i] = source_buf[cur];
1225 }
1226
1227 result = PyString_FromStringAndSize(result_buf,
1228 slicelength);
1229 PyMem_Free(result_buf);
1230 return result;
1231 }
1232 }
1233 else {
1234 PyErr_SetString(PyExc_TypeError,
1235 "string indices must be integers");
1236 return NULL;
1237 }
1238}
1239
Martin v. Löwis18e16552006-02-15 17:27:45 +00001240static Py_ssize_t
1241string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001242{
1243 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001244 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001245 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001246 return -1;
1247 }
1248 *ptr = (void *)self->ob_sval;
1249 return self->ob_size;
1250}
1251
Martin v. Löwis18e16552006-02-15 17:27:45 +00001252static Py_ssize_t
1253string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001254{
Guido van Rossum045e6881997-09-08 18:30:11 +00001255 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001256 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257 return -1;
1258}
1259
Martin v. Löwis18e16552006-02-15 17:27:45 +00001260static Py_ssize_t
1261string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001262{
1263 if ( lenp )
1264 *lenp = self->ob_size;
1265 return 1;
1266}
1267
Martin v. Löwis18e16552006-02-15 17:27:45 +00001268static Py_ssize_t
1269string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001270{
1271 if ( index != 0 ) {
1272 PyErr_SetString(PyExc_SystemError,
1273 "accessing non-existent string segment");
1274 return -1;
1275 }
1276 *ptr = self->ob_sval;
1277 return self->ob_size;
1278}
1279
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001280static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001281 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001282 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001283 (ssizeargfunc)string_repeat, /*sq_repeat*/
1284 (ssizeargfunc)string_item, /*sq_item*/
1285 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001286 0, /*sq_ass_item*/
1287 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001288 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001289};
1290
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001291static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001293 (binaryfunc)string_subscript,
1294 0,
1295};
1296
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001298 (readbufferproc)string_buffer_getreadbuf,
1299 (writebufferproc)string_buffer_getwritebuf,
1300 (segcountproc)string_buffer_getsegcount,
1301 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001302};
1303
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304
1305
1306#define LEFTSTRIP 0
1307#define RIGHTSTRIP 1
1308#define BOTHSTRIP 2
1309
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001310/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001311static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1312
1313#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001314
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001315#define SPLIT_APPEND(data, left, right) \
1316 str = PyString_FromStringAndSize((data) + (left), \
1317 (right) - (left)); \
1318 if (str == NULL) \
1319 goto onError; \
1320 if (PyList_Append(list, str)) { \
1321 Py_DECREF(str); \
1322 goto onError; \
1323 } \
1324 else \
1325 Py_DECREF(str);
1326
1327#define SPLIT_INSERT(data, left, right) \
1328 str = PyString_FromStringAndSize((data) + (left), \
1329 (right) - (left)); \
1330 if (str == NULL) \
1331 goto onError; \
1332 if (PyList_Insert(list, 0, str)) { \
1333 Py_DECREF(str); \
1334 goto onError; \
1335 } \
1336 else \
1337 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338
1339static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001340split_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001341{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001342 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001343 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001344 PyObject *list = PyList_New(0);
1345
1346 if (list == NULL)
1347 return NULL;
1348
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 for (i = j = 0; i < len; ) {
1350 while (i < len && isspace(Py_CHARMASK(s[i])))
1351 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353 while (i < len && !isspace(Py_CHARMASK(s[i])))
1354 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001355 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001356 if (maxsplit-- <= 0)
1357 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001358 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 while (i < len && isspace(Py_CHARMASK(s[i])))
1360 i++;
1361 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362 }
1363 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001364 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001365 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001366 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001368 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369 Py_DECREF(list);
1370 return NULL;
1371}
1372
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001373static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001374split_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001375{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001376 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001377 PyObject *str;
1378 PyObject *list = PyList_New(0);
1379
1380 if (list == NULL)
1381 return NULL;
1382
1383 for (i = j = 0; i < len; ) {
1384 if (s[i] == ch) {
1385 if (maxcount-- <= 0)
1386 break;
1387 SPLIT_APPEND(s, j, i);
1388 i = j = i + 1;
1389 } else
1390 i++;
1391 }
1392 if (j <= len) {
1393 SPLIT_APPEND(s, j, len);
1394 }
1395 return list;
1396
1397 onError:
1398 Py_DECREF(list);
1399 return NULL;
1400}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001402PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403"S.split([sep [,maxsplit]]) -> list of strings\n\
1404\n\
1405Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001406delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001407splits are done. If sep is not specified or is None, any\n\
1408whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001409
1410static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001411string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001413 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1414 int err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001415 int maxsplit = -1;
1416 const char *s = PyString_AS_STRING(self), *sub;
1417 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418
Guido van Rossum4c08d552000-03-10 22:55:18 +00001419 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001421 if (maxsplit < 0)
1422 maxsplit = INT_MAX;
1423 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001425 if (PyString_Check(subobj)) {
1426 sub = PyString_AS_STRING(subobj);
1427 n = PyString_GET_SIZE(subobj);
1428 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001429#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001430 else if (PyUnicode_Check(subobj))
1431 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001432#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001433 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1434 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001435
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436 if (n == 0) {
1437 PyErr_SetString(PyExc_ValueError, "empty separator");
1438 return NULL;
1439 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001440 else if (n == 1)
1441 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001442
1443 list = PyList_New(0);
1444 if (list == NULL)
1445 return NULL;
1446
1447 i = j = 0;
1448 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001449 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001450 if (maxsplit-- <= 0)
1451 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001452 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001453 if (item == NULL)
1454 goto fail;
1455 err = PyList_Append(list, item);
1456 Py_DECREF(item);
1457 if (err < 0)
1458 goto fail;
1459 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 }
1461 else
1462 i++;
1463 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001464 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465 if (item == NULL)
1466 goto fail;
1467 err = PyList_Append(list, item);
1468 Py_DECREF(item);
1469 if (err < 0)
1470 goto fail;
1471
1472 return list;
1473
1474 fail:
1475 Py_DECREF(list);
1476 return NULL;
1477}
1478
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001479static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001480rsplit_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001481{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001482 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001483 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001484 PyObject *list = PyList_New(0);
1485
1486 if (list == NULL)
1487 return NULL;
1488
1489 for (i = j = len - 1; i >= 0; ) {
1490 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1491 i--;
1492 j = i;
1493 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1494 i--;
1495 if (j > i) {
1496 if (maxsplit-- <= 0)
1497 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001498 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001499 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1500 i--;
1501 j = i;
1502 }
1503 }
1504 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001505 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001506 }
1507 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001508 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001509 Py_DECREF(list);
1510 return NULL;
1511}
1512
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001513static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001514rsplit_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001515{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001516 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001517 PyObject *str;
1518 PyObject *list = PyList_New(0);
1519
1520 if (list == NULL)
1521 return NULL;
1522
1523 for (i = j = len - 1; i >= 0; ) {
1524 if (s[i] == ch) {
1525 if (maxcount-- <= 0)
1526 break;
1527 SPLIT_INSERT(s, i + 1, j + 1);
1528 j = i = i - 1;
1529 } else
1530 i--;
1531 }
1532 if (j >= -1) {
1533 SPLIT_INSERT(s, 0, j + 1);
1534 }
1535 return list;
1536
1537 onError:
1538 Py_DECREF(list);
1539 return NULL;
1540}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001541
1542PyDoc_STRVAR(rsplit__doc__,
1543"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1544\n\
1545Return a list of the words in the string S, using sep as the\n\
1546delimiter string, starting at the end of the string and working\n\
1547to the front. If maxsplit is given, at most maxsplit splits are\n\
1548done. If sep is not specified or is None, any whitespace string\n\
1549is a separator.");
1550
1551static PyObject *
1552string_rsplit(PyStringObject *self, PyObject *args)
1553{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001554 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1555 int err;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001556 int maxsplit = -1;
1557 const char *s = PyString_AS_STRING(self), *sub;
1558 PyObject *list, *item, *subobj = Py_None;
1559
1560 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1561 return NULL;
1562 if (maxsplit < 0)
1563 maxsplit = INT_MAX;
1564 if (subobj == Py_None)
1565 return rsplit_whitespace(s, len, maxsplit);
1566 if (PyString_Check(subobj)) {
1567 sub = PyString_AS_STRING(subobj);
1568 n = PyString_GET_SIZE(subobj);
1569 }
1570#ifdef Py_USING_UNICODE
1571 else if (PyUnicode_Check(subobj))
1572 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1573#endif
1574 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1575 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001576
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001577 if (n == 0) {
1578 PyErr_SetString(PyExc_ValueError, "empty separator");
1579 return NULL;
1580 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001581 else if (n == 1)
1582 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001583
1584 list = PyList_New(0);
1585 if (list == NULL)
1586 return NULL;
1587
1588 j = len;
1589 i = j - n;
1590 while (i >= 0) {
1591 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1592 if (maxsplit-- <= 0)
1593 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001594 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001595 if (item == NULL)
1596 goto fail;
1597 err = PyList_Insert(list, 0, item);
1598 Py_DECREF(item);
1599 if (err < 0)
1600 goto fail;
1601 j = i;
1602 i -= n;
1603 }
1604 else
1605 i--;
1606 }
1607 item = PyString_FromStringAndSize(s, j);
1608 if (item == NULL)
1609 goto fail;
1610 err = PyList_Insert(list, 0, item);
1611 Py_DECREF(item);
1612 if (err < 0)
1613 goto fail;
1614
1615 return list;
1616
1617 fail:
1618 Py_DECREF(list);
1619 return NULL;
1620}
1621
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001623PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624"S.join(sequence) -> string\n\
1625\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001626Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001627sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628
1629static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001630string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001631{
1632 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001633 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001636 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001637 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001638 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001639 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640
Tim Peters19fe14e2001-01-19 03:03:47 +00001641 seq = PySequence_Fast(orig, "");
1642 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001643 return NULL;
1644 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001645
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001646 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001647 if (seqlen == 0) {
1648 Py_DECREF(seq);
1649 return PyString_FromString("");
1650 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001652 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001653 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1654 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001655 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001656 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001657 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001659
Raymond Hettinger674f2412004-08-23 23:23:54 +00001660 /* There are at least two things to join, or else we have a subclass
1661 * of the builtin types in the sequence.
1662 * Do a pre-pass to figure out the total amount of space we'll
1663 * need (sz), see whether any argument is absurd, and defer to
1664 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001665 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001666 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001667 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001668 item = PySequence_Fast_GET_ITEM(seq, i);
1669 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001670#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001671 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001672 /* Defer to Unicode join.
1673 * CAUTION: There's no gurantee that the
1674 * original sequence can be iterated over
1675 * again, so we must pass seq here.
1676 */
1677 PyObject *result;
1678 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001679 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001680 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001681 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001682#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001683 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001684 "sequence item %i: expected string,"
1685 " %.80s found",
Martin v. Löwis18e16552006-02-15 17:27:45 +00001686 /*XXX*/(int)i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001687 Py_DECREF(seq);
1688 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001689 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001690 sz += PyString_GET_SIZE(item);
1691 if (i != 0)
1692 sz += seplen;
1693 if (sz < old_sz || sz > INT_MAX) {
1694 PyErr_SetString(PyExc_OverflowError,
1695 "join() is too long for a Python string");
1696 Py_DECREF(seq);
1697 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001698 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001699 }
1700
1701 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001702 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001703 if (res == NULL) {
1704 Py_DECREF(seq);
1705 return NULL;
1706 }
1707
1708 /* Catenate everything. */
1709 p = PyString_AS_STRING(res);
1710 for (i = 0; i < seqlen; ++i) {
1711 size_t n;
1712 item = PySequence_Fast_GET_ITEM(seq, i);
1713 n = PyString_GET_SIZE(item);
1714 memcpy(p, PyString_AS_STRING(item), n);
1715 p += n;
1716 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001717 memcpy(p, sep, seplen);
1718 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001719 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001721
Jeremy Hylton49048292000-07-11 03:28:17 +00001722 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724}
1725
Tim Peters52e155e2001-06-16 05:42:57 +00001726PyObject *
1727_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001728{
Tim Petersa7259592001-06-16 05:11:17 +00001729 assert(sep != NULL && PyString_Check(sep));
1730 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001731 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001732}
1733
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001734static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001735string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001736{
1737 if (*end > len)
1738 *end = len;
1739 else if (*end < 0)
1740 *end += len;
1741 if (*end < 0)
1742 *end = 0;
1743 if (*start < 0)
1744 *start += len;
1745 if (*start < 0)
1746 *start = 0;
1747}
1748
Martin v. Löwis18e16552006-02-15 17:27:45 +00001749static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001750string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001753 Py_ssize_t len = PyString_GET_SIZE(self);
1754 Py_ssize_t n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756
Martin v. Löwis18e16552006-02-15 17:27:45 +00001757 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001758 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001759 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 return -2;
1761 if (PyString_Check(subobj)) {
1762 sub = PyString_AS_STRING(subobj);
1763 n = PyString_GET_SIZE(subobj);
1764 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001765#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001766 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001767 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001768#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001769 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770 return -2;
1771
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001772 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773
Guido van Rossum4c08d552000-03-10 22:55:18 +00001774 if (dir > 0) {
1775 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001777 last -= n;
1778 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001779 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001780 return (long)i;
1781 }
1782 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001783 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001784
Guido van Rossum4c08d552000-03-10 22:55:18 +00001785 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001786 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001787 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001788 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001789 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001790 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001791
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792 return -1;
1793}
1794
1795
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001796PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797"S.find(sub [,start [,end]]) -> int\n\
1798\n\
1799Return the lowest index in S where substring sub is found,\n\
1800such that sub is contained within s[start,end]. Optional\n\
1801arguments start and end are interpreted as in slice notation.\n\
1802\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001803Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804
1805static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001806string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001808 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809 if (result == -2)
1810 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001811 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812}
1813
1814
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001815PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816"S.index(sub [,start [,end]]) -> int\n\
1817\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001818Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819
1820static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001821string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001823 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824 if (result == -2)
1825 return NULL;
1826 if (result == -1) {
1827 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001828 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829 return NULL;
1830 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001831 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832}
1833
1834
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001835PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836"S.rfind(sub [,start [,end]]) -> int\n\
1837\n\
1838Return the highest index in S where substring sub is found,\n\
1839such that sub is contained within s[start,end]. Optional\n\
1840arguments start and end are interpreted as in slice notation.\n\
1841\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001842Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843
1844static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001845string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001847 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848 if (result == -2)
1849 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001850 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851}
1852
1853
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001854PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001855"S.rindex(sub [,start [,end]]) -> int\n\
1856\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001857Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858
1859static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001860string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001862 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863 if (result == -2)
1864 return NULL;
1865 if (result == -1) {
1866 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001867 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868 return NULL;
1869 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001870 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871}
1872
1873
1874static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001875do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1876{
1877 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001878 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001879 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001880 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1881 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001882
1883 i = 0;
1884 if (striptype != RIGHTSTRIP) {
1885 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1886 i++;
1887 }
1888 }
1889
1890 j = len;
1891 if (striptype != LEFTSTRIP) {
1892 do {
1893 j--;
1894 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1895 j++;
1896 }
1897
1898 if (i == 0 && j == len && PyString_CheckExact(self)) {
1899 Py_INCREF(self);
1900 return (PyObject*)self;
1901 }
1902 else
1903 return PyString_FromStringAndSize(s+i, j-i);
1904}
1905
1906
1907static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001908do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909{
1910 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001911 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913 i = 0;
1914 if (striptype != RIGHTSTRIP) {
1915 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1916 i++;
1917 }
1918 }
1919
1920 j = len;
1921 if (striptype != LEFTSTRIP) {
1922 do {
1923 j--;
1924 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1925 j++;
1926 }
1927
Tim Peters8fa5dd02001-09-12 02:18:30 +00001928 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001929 Py_INCREF(self);
1930 return (PyObject*)self;
1931 }
1932 else
1933 return PyString_FromStringAndSize(s+i, j-i);
1934}
1935
1936
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001937static PyObject *
1938do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1939{
1940 PyObject *sep = NULL;
1941
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001942 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001943 return NULL;
1944
1945 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001946 if (PyString_Check(sep))
1947 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001948#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001949 else if (PyUnicode_Check(sep)) {
1950 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1951 PyObject *res;
1952 if (uniself==NULL)
1953 return NULL;
1954 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1955 striptype, sep);
1956 Py_DECREF(uniself);
1957 return res;
1958 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001959#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001960 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001961 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001962#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001963 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001964#else
1965 "%s arg must be None or str",
1966#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001967 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001968 return NULL;
1969 }
1970 return do_xstrip(self, striptype, sep);
1971 }
1972
1973 return do_strip(self, striptype);
1974}
1975
1976
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001977PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001978"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979\n\
1980Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001981whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001982If chars is given and not None, remove characters in chars instead.\n\
1983If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984
1985static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001986string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001988 if (PyTuple_GET_SIZE(args) == 0)
1989 return do_strip(self, BOTHSTRIP); /* Common case */
1990 else
1991 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992}
1993
1994
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001995PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001996"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001998Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001999If chars is given and not None, remove characters in chars instead.\n\
2000If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001
2002static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002003string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002005 if (PyTuple_GET_SIZE(args) == 0)
2006 return do_strip(self, LEFTSTRIP); /* Common case */
2007 else
2008 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009}
2010
2011
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002012PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002013"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002015Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002016If chars is given and not None, remove characters in chars instead.\n\
2017If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002018
2019static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002020string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002022 if (PyTuple_GET_SIZE(args) == 0)
2023 return do_strip(self, RIGHTSTRIP); /* Common case */
2024 else
2025 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026}
2027
2028
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002029PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002030"S.lower() -> string\n\
2031\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002032Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033
2034static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002035string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036{
2037 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002038 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039 PyObject *new;
2040
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041 new = PyString_FromStringAndSize(NULL, n);
2042 if (new == NULL)
2043 return NULL;
2044 s_new = PyString_AsString(new);
2045 for (i = 0; i < n; i++) {
2046 int c = Py_CHARMASK(*s++);
2047 if (isupper(c)) {
2048 *s_new = tolower(c);
2049 } else
2050 *s_new = c;
2051 s_new++;
2052 }
2053 return new;
2054}
2055
2056
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002057PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058"S.upper() -> string\n\
2059\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002060Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061
2062static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002063string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064{
2065 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002066 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002067 PyObject *new;
2068
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002069 new = PyString_FromStringAndSize(NULL, n);
2070 if (new == NULL)
2071 return NULL;
2072 s_new = PyString_AsString(new);
2073 for (i = 0; i < n; i++) {
2074 int c = Py_CHARMASK(*s++);
2075 if (islower(c)) {
2076 *s_new = toupper(c);
2077 } else
2078 *s_new = c;
2079 s_new++;
2080 }
2081 return new;
2082}
2083
2084
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002085PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002086"S.title() -> string\n\
2087\n\
2088Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002089characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002090
2091static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002092string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002093{
2094 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002095 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096 int previous_is_cased = 0;
2097 PyObject *new;
2098
Guido van Rossum4c08d552000-03-10 22:55:18 +00002099 new = PyString_FromStringAndSize(NULL, n);
2100 if (new == NULL)
2101 return NULL;
2102 s_new = PyString_AsString(new);
2103 for (i = 0; i < n; i++) {
2104 int c = Py_CHARMASK(*s++);
2105 if (islower(c)) {
2106 if (!previous_is_cased)
2107 c = toupper(c);
2108 previous_is_cased = 1;
2109 } else if (isupper(c)) {
2110 if (previous_is_cased)
2111 c = tolower(c);
2112 previous_is_cased = 1;
2113 } else
2114 previous_is_cased = 0;
2115 *s_new++ = c;
2116 }
2117 return new;
2118}
2119
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002120PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121"S.capitalize() -> string\n\
2122\n\
2123Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002124capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125
2126static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002127string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128{
2129 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002130 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131 PyObject *new;
2132
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133 new = PyString_FromStringAndSize(NULL, n);
2134 if (new == NULL)
2135 return NULL;
2136 s_new = PyString_AsString(new);
2137 if (0 < n) {
2138 int c = Py_CHARMASK(*s++);
2139 if (islower(c))
2140 *s_new = toupper(c);
2141 else
2142 *s_new = c;
2143 s_new++;
2144 }
2145 for (i = 1; i < n; i++) {
2146 int c = Py_CHARMASK(*s++);
2147 if (isupper(c))
2148 *s_new = tolower(c);
2149 else
2150 *s_new = c;
2151 s_new++;
2152 }
2153 return new;
2154}
2155
2156
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002157PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158"S.count(sub[, start[, end]]) -> int\n\
2159\n\
2160Return the number of occurrences of substring sub in string\n\
2161S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002162interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163
2164static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002165string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002167 const char *s = PyString_AS_STRING(self), *sub, *t;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002168 Py_ssize_t len = PyString_GET_SIZE(self), n;
2169 Py_ssize_t i = 0, last = INT_MAX;
2170 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172
Guido van Rossumc6821402000-05-08 14:08:05 +00002173 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2174 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002176
Guido van Rossum4c08d552000-03-10 22:55:18 +00002177 if (PyString_Check(subobj)) {
2178 sub = PyString_AS_STRING(subobj);
2179 n = PyString_GET_SIZE(subobj);
2180 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002181#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002182 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002183 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002184 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2185 if (count == -1)
2186 return NULL;
2187 else
2188 return PyInt_FromLong((long) count);
2189 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002190#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2192 return NULL;
2193
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002194 string_adjust_indices(&i, &last, len);
2195
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196 m = last + 1 - n;
2197 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002198 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199
2200 r = 0;
2201 while (i < m) {
2202 if (!memcmp(s+i, sub, n)) {
2203 r++;
2204 i += n;
2205 } else {
2206 i++;
2207 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002208 if (i >= m)
2209 break;
2210 t = memchr(s+i, sub[0], m-i);
2211 if (t == NULL)
2212 break;
2213 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002215 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002216}
2217
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002218PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219"S.swapcase() -> string\n\
2220\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002221Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002222converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223
2224static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002225string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226{
2227 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002228 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229 PyObject *new;
2230
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002231 new = PyString_FromStringAndSize(NULL, n);
2232 if (new == NULL)
2233 return NULL;
2234 s_new = PyString_AsString(new);
2235 for (i = 0; i < n; i++) {
2236 int c = Py_CHARMASK(*s++);
2237 if (islower(c)) {
2238 *s_new = toupper(c);
2239 }
2240 else if (isupper(c)) {
2241 *s_new = tolower(c);
2242 }
2243 else
2244 *s_new = c;
2245 s_new++;
2246 }
2247 return new;
2248}
2249
2250
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002251PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252"S.translate(table [,deletechars]) -> string\n\
2253\n\
2254Return a copy of the string S, where all characters occurring\n\
2255in the optional argument deletechars are removed, and the\n\
2256remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002257translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258
2259static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002260string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 register char *input, *output;
2263 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002264 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002266 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002267 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268 PyObject *result;
2269 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002270 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002272 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002275
2276 if (PyString_Check(tableobj)) {
2277 table1 = PyString_AS_STRING(tableobj);
2278 tablen = PyString_GET_SIZE(tableobj);
2279 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002280#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002282 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 parameter; instead a mapping to None will cause characters
2284 to be deleted. */
2285 if (delobj != NULL) {
2286 PyErr_SetString(PyExc_TypeError,
2287 "deletions are implemented differently for unicode");
2288 return NULL;
2289 }
2290 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2291 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002292#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295
Martin v. Löwis00b61272002-12-12 20:03:19 +00002296 if (tablen != 256) {
2297 PyErr_SetString(PyExc_ValueError,
2298 "translation table must be 256 characters long");
2299 return NULL;
2300 }
2301
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302 if (delobj != NULL) {
2303 if (PyString_Check(delobj)) {
2304 del_table = PyString_AS_STRING(delobj);
2305 dellen = PyString_GET_SIZE(delobj);
2306 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002307#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308 else if (PyUnicode_Check(delobj)) {
2309 PyErr_SetString(PyExc_TypeError,
2310 "deletions are implemented differently for unicode");
2311 return NULL;
2312 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002313#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2315 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002316 }
2317 else {
2318 del_table = NULL;
2319 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320 }
2321
2322 table = table1;
2323 inlen = PyString_Size(input_obj);
2324 result = PyString_FromStringAndSize((char *)NULL, inlen);
2325 if (result == NULL)
2326 return NULL;
2327 output_start = output = PyString_AsString(result);
2328 input = PyString_AsString(input_obj);
2329
2330 if (dellen == 0) {
2331 /* If no deletions are required, use faster code */
2332 for (i = inlen; --i >= 0; ) {
2333 c = Py_CHARMASK(*input++);
2334 if (Py_CHARMASK((*output++ = table[c])) != c)
2335 changed = 1;
2336 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002337 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338 return result;
2339 Py_DECREF(result);
2340 Py_INCREF(input_obj);
2341 return input_obj;
2342 }
2343
2344 for (i = 0; i < 256; i++)
2345 trans_table[i] = Py_CHARMASK(table[i]);
2346
2347 for (i = 0; i < dellen; i++)
2348 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2349
2350 for (i = inlen; --i >= 0; ) {
2351 c = Py_CHARMASK(*input++);
2352 if (trans_table[c] != -1)
2353 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2354 continue;
2355 changed = 1;
2356 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002357 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002358 Py_DECREF(result);
2359 Py_INCREF(input_obj);
2360 return input_obj;
2361 }
2362 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002363 if (inlen > 0)
2364 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365 return result;
2366}
2367
2368
2369/* What follows is used for implementing replace(). Perry Stoll. */
2370
2371/*
2372 mymemfind
2373
2374 strstr replacement for arbitrary blocks of memory.
2375
Barry Warsaw51ac5802000-03-20 16:36:48 +00002376 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377 contents of memory pointed to by PAT. Returns the index into MEM if
2378 found, or -1 if not found. If len of PAT is greater than length of
2379 MEM, the function returns -1.
2380*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00002381static Py_ssize_t
2382mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002384 register Py_ssize_t ii;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385
2386 /* pattern can not occur in the last pat_len-1 chars */
2387 len -= pat_len;
2388
2389 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002390 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002391 return ii;
2392 }
2393 }
2394 return -1;
2395}
2396
2397/*
2398 mymemcnt
2399
2400 Return the number of distinct times PAT is found in MEM.
2401 meaning mem=1111 and pat==11 returns 2.
2402 mem=11111 and pat==11 also return 2.
2403 */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002404static Py_ssize_t
2405mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002407 register Py_ssize_t offset = 0;
2408 Py_ssize_t nfound = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409
2410 while (len >= 0) {
2411 offset = mymemfind(mem, len, pat, pat_len);
2412 if (offset == -1)
2413 break;
2414 mem += offset + pat_len;
2415 len -= offset + pat_len;
2416 nfound++;
2417 }
2418 return nfound;
2419}
2420
2421/*
2422 mymemreplace
2423
Thomas Wouters7e474022000-07-16 12:04:32 +00002424 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002425 replaced with SUB.
2426
Thomas Wouters7e474022000-07-16 12:04:32 +00002427 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002428 of PAT in STR, then the original string is returned. Otherwise, a new
2429 string is allocated here and returned.
2430
2431 on return, out_len is:
2432 the length of output string, or
2433 -1 if the input string is returned, or
2434 unchanged if an error occurs (no memory).
2435
2436 return value is:
2437 the new string allocated locally, or
2438 NULL if an error occurred.
2439*/
2440static char *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002441mymemreplace(const char *str, Py_ssize_t len, /* input string */
2442 const char *pat, Py_ssize_t pat_len, /* pattern string to find */
2443 const char *sub, Py_ssize_t sub_len, /* substitution string */
2444 Py_ssize_t count, /* number of replacements */
2445 Py_ssize_t *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446{
2447 char *out_s;
2448 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002449 Py_ssize_t nfound, offset, new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002450
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002451 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002452 goto return_same;
2453
2454 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002455 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002456 if (count < 0)
2457 count = INT_MAX;
2458 else if (nfound > count)
2459 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002460 if (nfound == 0)
2461 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002462
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002463 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002464 if (new_len == 0) {
2465 /* Have to allocate something for the caller to free(). */
2466 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002467 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002468 return NULL;
2469 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002470 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002471 else {
2472 assert(new_len > 0);
2473 new_s = (char *)PyMem_MALLOC(new_len);
2474 if (new_s == NULL)
2475 return NULL;
2476 out_s = new_s;
2477
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002478 if (pat_len > 0) {
2479 for (; nfound > 0; --nfound) {
2480 /* find index of next instance of pattern */
2481 offset = mymemfind(str, len, pat, pat_len);
2482 if (offset == -1)
2483 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002484
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002485 /* copy non matching part of input string */
2486 memcpy(new_s, str, offset);
2487 str += offset + pat_len;
2488 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002489
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002490 /* copy substitute into the output string */
2491 new_s += offset;
2492 memcpy(new_s, sub, sub_len);
2493 new_s += sub_len;
2494 }
2495 /* copy any remaining values into output string */
2496 if (len > 0)
2497 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002498 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002499 else {
2500 for (;;++str, --len) {
2501 memcpy(new_s, sub, sub_len);
2502 new_s += sub_len;
2503 if (--nfound <= 0) {
2504 memcpy(new_s, str, len);
2505 break;
2506 }
2507 *new_s++ = *str;
2508 }
2509 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002510 }
2511 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002512 return out_s;
2513
2514 return_same:
2515 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002516 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002517}
2518
2519
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002520PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002521"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002522\n\
2523Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002524old replaced by new. If the optional argument count is\n\
2525given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526
2527static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002528string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002529{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530 const char *str = PyString_AS_STRING(self), *sub, *repl;
2531 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002532 const Py_ssize_t len = PyString_GET_SIZE(self);
2533 Py_ssize_t sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002534 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002535 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002536 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537
Guido van Rossum4c08d552000-03-10 22:55:18 +00002538 if (!PyArg_ParseTuple(args, "OO|i:replace",
2539 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002540 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002541
2542 if (PyString_Check(subobj)) {
2543 sub = PyString_AS_STRING(subobj);
2544 sub_len = PyString_GET_SIZE(subobj);
2545 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002546#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002547 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002548 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002549 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002550#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002551 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2552 return NULL;
2553
2554 if (PyString_Check(replobj)) {
2555 repl = PyString_AS_STRING(replobj);
2556 repl_len = PyString_GET_SIZE(replobj);
2557 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002558#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002559 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002560 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002562#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002563 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2564 return NULL;
2565
Guido van Rossum4c08d552000-03-10 22:55:18 +00002566 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002567 if (new_s == NULL) {
2568 PyErr_NoMemory();
2569 return NULL;
2570 }
2571 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002572 if (PyString_CheckExact(self)) {
2573 /* we're returning another reference to self */
2574 new = (PyObject*)self;
2575 Py_INCREF(new);
2576 }
2577 else {
2578 new = PyString_FromStringAndSize(str, len);
2579 if (new == NULL)
2580 return NULL;
2581 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002582 }
2583 else {
2584 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002585 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002586 }
2587 return new;
2588}
2589
2590
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002591PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002592"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002593\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002594Return True if S starts with the specified prefix, False otherwise.\n\
2595With optional start, test S beginning at that position.\n\
2596With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002597
2598static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002599string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002600{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002601 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002602 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002603 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002604 Py_ssize_t plen;
2605 Py_ssize_t start = 0;
2606 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002607 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002608
Guido van Rossumc6821402000-05-08 14:08:05 +00002609 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2610 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611 return NULL;
2612 if (PyString_Check(subobj)) {
2613 prefix = PyString_AS_STRING(subobj);
2614 plen = PyString_GET_SIZE(subobj);
2615 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002616#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002617 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002618 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002619 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002620 subobj, start, end, -1);
2621 if (rc == -1)
2622 return NULL;
2623 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002624 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002625 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002626#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002627 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002628 return NULL;
2629
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002630 string_adjust_indices(&start, &end, len);
2631
2632 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002633 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002634
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002635 if (end-start >= plen)
2636 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2637 else
2638 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002639}
2640
2641
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002642PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002643"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002644\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002645Return True if S ends with the specified suffix, False otherwise.\n\
2646With optional start, test S beginning at that position.\n\
2647With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002648
2649static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002650string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002651{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002652 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002653 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002654 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002655 Py_ssize_t slen;
2656 Py_ssize_t start = 0;
2657 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002658 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002659
Guido van Rossumc6821402000-05-08 14:08:05 +00002660 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2661 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002662 return NULL;
2663 if (PyString_Check(subobj)) {
2664 suffix = PyString_AS_STRING(subobj);
2665 slen = PyString_GET_SIZE(subobj);
2666 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002667#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002668 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002669 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002670 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002671 subobj, start, end, +1);
2672 if (rc == -1)
2673 return NULL;
2674 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002675 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002676 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002677#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002678 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002679 return NULL;
2680
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002681 string_adjust_indices(&start, &end, len);
2682
2683 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002684 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002685
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002686 if (end-slen > start)
2687 start = end - slen;
2688 if (end-start >= slen)
2689 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2690 else
2691 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002692}
2693
2694
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002695PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002696"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002697\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002698Encodes S using the codec registered for encoding. encoding defaults\n\
2699to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002700handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002701a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2702'xmlcharrefreplace' as well as any other name registered with\n\
2703codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002704
2705static PyObject *
2706string_encode(PyStringObject *self, PyObject *args)
2707{
2708 char *encoding = NULL;
2709 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002710 PyObject *v;
2711
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002712 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2713 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002714 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002715 if (v == NULL)
2716 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002717 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2718 PyErr_Format(PyExc_TypeError,
2719 "encoder did not return a string/unicode object "
2720 "(type=%.400s)",
2721 v->ob_type->tp_name);
2722 Py_DECREF(v);
2723 return NULL;
2724 }
2725 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002726
2727 onError:
2728 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002729}
2730
2731
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002732PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002733"S.decode([encoding[,errors]]) -> object\n\
2734\n\
2735Decodes S using the codec registered for encoding. encoding defaults\n\
2736to the default encoding. errors may be given to set a different error\n\
2737handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002738a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2739as well as any other name registerd with codecs.register_error that is\n\
2740able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002741
2742static PyObject *
2743string_decode(PyStringObject *self, PyObject *args)
2744{
2745 char *encoding = NULL;
2746 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002747 PyObject *v;
2748
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002749 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2750 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002751 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002752 if (v == NULL)
2753 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002754 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2755 PyErr_Format(PyExc_TypeError,
2756 "decoder did not return a string/unicode object "
2757 "(type=%.400s)",
2758 v->ob_type->tp_name);
2759 Py_DECREF(v);
2760 return NULL;
2761 }
2762 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002763
2764 onError:
2765 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002766}
2767
2768
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002769PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002770"S.expandtabs([tabsize]) -> string\n\
2771\n\
2772Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002773If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002774
2775static PyObject*
2776string_expandtabs(PyStringObject *self, PyObject *args)
2777{
2778 const char *e, *p;
2779 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002780 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002781 PyObject *u;
2782 int tabsize = 8;
2783
2784 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2785 return NULL;
2786
Thomas Wouters7e474022000-07-16 12:04:32 +00002787 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002788 i = j = 0;
2789 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2790 for (p = PyString_AS_STRING(self); p < e; p++)
2791 if (*p == '\t') {
2792 if (tabsize > 0)
2793 j += tabsize - (j % tabsize);
2794 }
2795 else {
2796 j++;
2797 if (*p == '\n' || *p == '\r') {
2798 i += j;
2799 j = 0;
2800 }
2801 }
2802
2803 /* Second pass: create output string and fill it */
2804 u = PyString_FromStringAndSize(NULL, i + j);
2805 if (!u)
2806 return NULL;
2807
2808 j = 0;
2809 q = PyString_AS_STRING(u);
2810
2811 for (p = PyString_AS_STRING(self); p < e; p++)
2812 if (*p == '\t') {
2813 if (tabsize > 0) {
2814 i = tabsize - (j % tabsize);
2815 j += i;
2816 while (i--)
2817 *q++ = ' ';
2818 }
2819 }
2820 else {
2821 j++;
2822 *q++ = *p;
2823 if (*p == '\n' || *p == '\r')
2824 j = 0;
2825 }
2826
2827 return u;
2828}
2829
Tim Peters8fa5dd02001-09-12 02:18:30 +00002830static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002831pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002832{
2833 PyObject *u;
2834
2835 if (left < 0)
2836 left = 0;
2837 if (right < 0)
2838 right = 0;
2839
Tim Peters8fa5dd02001-09-12 02:18:30 +00002840 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002841 Py_INCREF(self);
2842 return (PyObject *)self;
2843 }
2844
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002845 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002846 left + PyString_GET_SIZE(self) + right);
2847 if (u) {
2848 if (left)
2849 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002850 memcpy(PyString_AS_STRING(u) + left,
2851 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002852 PyString_GET_SIZE(self));
2853 if (right)
2854 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2855 fill, right);
2856 }
2857
2858 return u;
2859}
2860
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002861PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002862"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002863"\n"
2864"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002865"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002866
2867static PyObject *
2868string_ljust(PyStringObject *self, PyObject *args)
2869{
2870 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002871 char fillchar = ' ';
2872
2873 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002874 return NULL;
2875
Tim Peters8fa5dd02001-09-12 02:18:30 +00002876 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877 Py_INCREF(self);
2878 return (PyObject*) self;
2879 }
2880
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002881 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002882}
2883
2884
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002885PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002886"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002887"\n"
2888"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002889"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002890
2891static PyObject *
2892string_rjust(PyStringObject *self, PyObject *args)
2893{
2894 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002895 char fillchar = ' ';
2896
2897 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002898 return NULL;
2899
Tim Peters8fa5dd02001-09-12 02:18:30 +00002900 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002901 Py_INCREF(self);
2902 return (PyObject*) self;
2903 }
2904
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002905 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002906}
2907
2908
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002909PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002910"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002911"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002912"Return S centered in a string of length width. Padding is\n"
2913"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002914
2915static PyObject *
2916string_center(PyStringObject *self, PyObject *args)
2917{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002918 Py_ssize_t marg, left;
2919 long width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002920 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002921
Martin v. Löwis18e16552006-02-15 17:27:45 +00002922 if (!PyArg_ParseTuple(args, "l|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002923 return NULL;
2924
Tim Peters8fa5dd02001-09-12 02:18:30 +00002925 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002926 Py_INCREF(self);
2927 return (PyObject*) self;
2928 }
2929
2930 marg = width - PyString_GET_SIZE(self);
2931 left = marg / 2 + (marg & width & 1);
2932
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002933 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002934}
2935
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002936PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002937"S.zfill(width) -> string\n"
2938"\n"
2939"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002940"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002941
2942static PyObject *
2943string_zfill(PyStringObject *self, PyObject *args)
2944{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002945 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00002946 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002947 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002948
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002949 long width;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002950 if (!PyArg_ParseTuple(args, "l:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00002951 return NULL;
2952
2953 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002954 if (PyString_CheckExact(self)) {
2955 Py_INCREF(self);
2956 return (PyObject*) self;
2957 }
2958 else
2959 return PyString_FromStringAndSize(
2960 PyString_AS_STRING(self),
2961 PyString_GET_SIZE(self)
2962 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002963 }
2964
2965 fill = width - PyString_GET_SIZE(self);
2966
2967 s = pad(self, fill, 0, '0');
2968
2969 if (s == NULL)
2970 return NULL;
2971
2972 p = PyString_AS_STRING(s);
2973 if (p[fill] == '+' || p[fill] == '-') {
2974 /* move sign to beginning of string */
2975 p[0] = p[fill];
2976 p[fill] = '0';
2977 }
2978
2979 return (PyObject*) s;
2980}
2981
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002982PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002983"S.isspace() -> bool\n\
2984\n\
2985Return True if all characters in S are whitespace\n\
2986and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002987
2988static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002989string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002990{
Fred Drakeba096332000-07-09 07:04:36 +00002991 register const unsigned char *p
2992 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002993 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002994
Guido van Rossum4c08d552000-03-10 22:55:18 +00002995 /* Shortcut for single character strings */
2996 if (PyString_GET_SIZE(self) == 1 &&
2997 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002998 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002999
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003000 /* Special case for empty strings */
3001 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003002 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003003
Guido van Rossum4c08d552000-03-10 22:55:18 +00003004 e = p + PyString_GET_SIZE(self);
3005 for (; p < e; p++) {
3006 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003007 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003008 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003009 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003010}
3011
3012
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003013PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003014"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003015\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003016Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003017and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003018
3019static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003020string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003021{
Fred Drakeba096332000-07-09 07:04:36 +00003022 register const unsigned char *p
3023 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003024 register const unsigned char *e;
3025
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003026 /* Shortcut for single character strings */
3027 if (PyString_GET_SIZE(self) == 1 &&
3028 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003029 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003030
3031 /* Special case for empty strings */
3032 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003033 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003034
3035 e = p + PyString_GET_SIZE(self);
3036 for (; p < e; p++) {
3037 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003038 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003039 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003040 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003041}
3042
3043
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003044PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003045"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003046\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003047Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003048and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003049
3050static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003051string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003052{
Fred Drakeba096332000-07-09 07:04:36 +00003053 register const unsigned char *p
3054 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003055 register const unsigned char *e;
3056
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003057 /* Shortcut for single character strings */
3058 if (PyString_GET_SIZE(self) == 1 &&
3059 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003060 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003061
3062 /* Special case for empty strings */
3063 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003064 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003065
3066 e = p + PyString_GET_SIZE(self);
3067 for (; p < e; p++) {
3068 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003069 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003070 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003071 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003072}
3073
3074
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003075PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003076"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003077\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003078Return True if all characters in S are digits\n\
3079and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080
3081static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003082string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003083{
Fred Drakeba096332000-07-09 07:04:36 +00003084 register const unsigned char *p
3085 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003086 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003087
Guido van Rossum4c08d552000-03-10 22:55:18 +00003088 /* Shortcut for single character strings */
3089 if (PyString_GET_SIZE(self) == 1 &&
3090 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003091 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003092
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003093 /* Special case for empty strings */
3094 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003095 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003096
Guido van Rossum4c08d552000-03-10 22:55:18 +00003097 e = p + PyString_GET_SIZE(self);
3098 for (; p < e; p++) {
3099 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003100 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003101 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003102 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003103}
3104
3105
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003106PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003107"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003108\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003109Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003110at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003111
3112static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003113string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114{
Fred Drakeba096332000-07-09 07:04:36 +00003115 register const unsigned char *p
3116 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003117 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003118 int cased;
3119
Guido van Rossum4c08d552000-03-10 22:55:18 +00003120 /* Shortcut for single character strings */
3121 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003122 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003123
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003124 /* Special case for empty strings */
3125 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003126 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003127
Guido van Rossum4c08d552000-03-10 22:55:18 +00003128 e = p + PyString_GET_SIZE(self);
3129 cased = 0;
3130 for (; p < e; p++) {
3131 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003132 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003133 else if (!cased && islower(*p))
3134 cased = 1;
3135 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003136 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003137}
3138
3139
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003140PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003141"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003142\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003143Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003144at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145
3146static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003147string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003148{
Fred Drakeba096332000-07-09 07:04:36 +00003149 register const unsigned char *p
3150 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003151 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003152 int cased;
3153
Guido van Rossum4c08d552000-03-10 22:55:18 +00003154 /* Shortcut for single character strings */
3155 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003156 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003157
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003158 /* Special case for empty strings */
3159 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003160 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003161
Guido van Rossum4c08d552000-03-10 22:55:18 +00003162 e = p + PyString_GET_SIZE(self);
3163 cased = 0;
3164 for (; p < e; p++) {
3165 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003166 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003167 else if (!cased && isupper(*p))
3168 cased = 1;
3169 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003170 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003171}
3172
3173
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003174PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003175"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003177Return True if S is a titlecased string and there is at least one\n\
3178character in S, i.e. uppercase characters may only follow uncased\n\
3179characters and lowercase characters only cased ones. Return False\n\
3180otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003181
3182static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003183string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003184{
Fred Drakeba096332000-07-09 07:04:36 +00003185 register const unsigned char *p
3186 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003187 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003188 int cased, previous_is_cased;
3189
Guido van Rossum4c08d552000-03-10 22:55:18 +00003190 /* Shortcut for single character strings */
3191 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003192 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003193
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003194 /* Special case for empty strings */
3195 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003196 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003197
Guido van Rossum4c08d552000-03-10 22:55:18 +00003198 e = p + PyString_GET_SIZE(self);
3199 cased = 0;
3200 previous_is_cased = 0;
3201 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003202 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003203
3204 if (isupper(ch)) {
3205 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003206 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003207 previous_is_cased = 1;
3208 cased = 1;
3209 }
3210 else if (islower(ch)) {
3211 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003212 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003213 previous_is_cased = 1;
3214 cased = 1;
3215 }
3216 else
3217 previous_is_cased = 0;
3218 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003219 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003220}
3221
3222
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003223PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003224"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003225\n\
3226Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003227Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003228is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003229
Guido van Rossum4c08d552000-03-10 22:55:18 +00003230static PyObject*
3231string_splitlines(PyStringObject *self, PyObject *args)
3232{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003233 register Py_ssize_t i;
3234 register Py_ssize_t j;
3235 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003236 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003237 PyObject *list;
3238 PyObject *str;
3239 char *data;
3240
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003241 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003242 return NULL;
3243
3244 data = PyString_AS_STRING(self);
3245 len = PyString_GET_SIZE(self);
3246
Guido van Rossum4c08d552000-03-10 22:55:18 +00003247 list = PyList_New(0);
3248 if (!list)
3249 goto onError;
3250
3251 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003252 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003253
Guido van Rossum4c08d552000-03-10 22:55:18 +00003254 /* Find a line and append it */
3255 while (i < len && data[i] != '\n' && data[i] != '\r')
3256 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003257
3258 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003259 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003260 if (i < len) {
3261 if (data[i] == '\r' && i + 1 < len &&
3262 data[i+1] == '\n')
3263 i += 2;
3264 else
3265 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003266 if (keepends)
3267 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003268 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003269 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003270 j = i;
3271 }
3272 if (j < len) {
3273 SPLIT_APPEND(data, j, len);
3274 }
3275
3276 return list;
3277
3278 onError:
3279 Py_DECREF(list);
3280 return NULL;
3281}
3282
3283#undef SPLIT_APPEND
3284
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003285static PyObject *
3286string_getnewargs(PyStringObject *v)
3287{
3288 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3289}
3290
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003291
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003292static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003293string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003294 /* Counterparts of the obsolete stropmodule functions; except
3295 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003296 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3297 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003298 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003299 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3300 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003301 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3302 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3303 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3304 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3305 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3306 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3307 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003308 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3309 capitalize__doc__},
3310 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3311 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3312 endswith__doc__},
3313 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3314 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3315 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3316 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3317 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3318 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3319 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3320 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3321 startswith__doc__},
3322 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3323 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3324 swapcase__doc__},
3325 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3326 translate__doc__},
3327 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3328 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3329 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3330 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3331 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3332 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3333 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3334 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3335 expandtabs__doc__},
3336 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3337 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003338 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003339 {NULL, NULL} /* sentinel */
3340};
3341
Jeremy Hylton938ace62002-07-17 16:30:39 +00003342static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003343str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3344
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003345static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003346string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003347{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003348 PyObject *x = NULL;
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00003349 static const char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003350
Guido van Rossumae960af2001-08-30 03:11:59 +00003351 if (type != &PyString_Type)
3352 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003353 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3354 return NULL;
3355 if (x == NULL)
3356 return PyString_FromString("");
3357 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003358}
3359
Guido van Rossumae960af2001-08-30 03:11:59 +00003360static PyObject *
3361str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3362{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003363 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003364 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003365
3366 assert(PyType_IsSubtype(type, &PyString_Type));
3367 tmp = string_new(&PyString_Type, args, kwds);
3368 if (tmp == NULL)
3369 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003370 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003371 n = PyString_GET_SIZE(tmp);
3372 pnew = type->tp_alloc(type, n);
3373 if (pnew != NULL) {
3374 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003375 ((PyStringObject *)pnew)->ob_shash =
3376 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003377 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003378 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003379 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003380 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003381}
3382
Guido van Rossumcacfc072002-05-24 19:01:59 +00003383static PyObject *
3384basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3385{
3386 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003387 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003388 return NULL;
3389}
3390
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003391static PyObject *
3392string_mod(PyObject *v, PyObject *w)
3393{
3394 if (!PyString_Check(v)) {
3395 Py_INCREF(Py_NotImplemented);
3396 return Py_NotImplemented;
3397 }
3398 return PyString_Format(v, w);
3399}
3400
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003401PyDoc_STRVAR(basestring_doc,
3402"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003403
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003404static PyNumberMethods string_as_number = {
3405 0, /*nb_add*/
3406 0, /*nb_subtract*/
3407 0, /*nb_multiply*/
3408 0, /*nb_divide*/
3409 string_mod, /*nb_remainder*/
3410};
3411
3412
Guido van Rossumcacfc072002-05-24 19:01:59 +00003413PyTypeObject PyBaseString_Type = {
3414 PyObject_HEAD_INIT(&PyType_Type)
3415 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003416 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003417 0,
3418 0,
3419 0, /* tp_dealloc */
3420 0, /* tp_print */
3421 0, /* tp_getattr */
3422 0, /* tp_setattr */
3423 0, /* tp_compare */
3424 0, /* tp_repr */
3425 0, /* tp_as_number */
3426 0, /* tp_as_sequence */
3427 0, /* tp_as_mapping */
3428 0, /* tp_hash */
3429 0, /* tp_call */
3430 0, /* tp_str */
3431 0, /* tp_getattro */
3432 0, /* tp_setattro */
3433 0, /* tp_as_buffer */
3434 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3435 basestring_doc, /* tp_doc */
3436 0, /* tp_traverse */
3437 0, /* tp_clear */
3438 0, /* tp_richcompare */
3439 0, /* tp_weaklistoffset */
3440 0, /* tp_iter */
3441 0, /* tp_iternext */
3442 0, /* tp_methods */
3443 0, /* tp_members */
3444 0, /* tp_getset */
3445 &PyBaseObject_Type, /* tp_base */
3446 0, /* tp_dict */
3447 0, /* tp_descr_get */
3448 0, /* tp_descr_set */
3449 0, /* tp_dictoffset */
3450 0, /* tp_init */
3451 0, /* tp_alloc */
3452 basestring_new, /* tp_new */
3453 0, /* tp_free */
3454};
3455
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003456PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003457"str(object) -> string\n\
3458\n\
3459Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003460If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003461
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003462PyTypeObject PyString_Type = {
3463 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003464 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003465 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003466 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003467 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003468 (destructor)string_dealloc, /* tp_dealloc */
3469 (printfunc)string_print, /* tp_print */
3470 0, /* tp_getattr */
3471 0, /* tp_setattr */
3472 0, /* tp_compare */
3473 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003474 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003475 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003476 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003477 (hashfunc)string_hash, /* tp_hash */
3478 0, /* tp_call */
3479 (reprfunc)string_str, /* tp_str */
3480 PyObject_GenericGetAttr, /* tp_getattro */
3481 0, /* tp_setattro */
3482 &string_as_buffer, /* tp_as_buffer */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003483 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3484 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003485 string_doc, /* tp_doc */
3486 0, /* tp_traverse */
3487 0, /* tp_clear */
3488 (richcmpfunc)string_richcompare, /* tp_richcompare */
3489 0, /* tp_weaklistoffset */
3490 0, /* tp_iter */
3491 0, /* tp_iternext */
3492 string_methods, /* tp_methods */
3493 0, /* tp_members */
3494 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003495 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003496 0, /* tp_dict */
3497 0, /* tp_descr_get */
3498 0, /* tp_descr_set */
3499 0, /* tp_dictoffset */
3500 0, /* tp_init */
3501 0, /* tp_alloc */
3502 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003503 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003504};
3505
3506void
Fred Drakeba096332000-07-09 07:04:36 +00003507PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003508{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003509 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003510 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003511 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003512 if (w == NULL || !PyString_Check(*pv)) {
3513 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003514 *pv = NULL;
3515 return;
3516 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003517 v = string_concat((PyStringObject *) *pv, w);
3518 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003519 *pv = v;
3520}
3521
Guido van Rossum013142a1994-08-30 08:19:36 +00003522void
Fred Drakeba096332000-07-09 07:04:36 +00003523PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003524{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003525 PyString_Concat(pv, w);
3526 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003527}
3528
3529
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003530/* The following function breaks the notion that strings are immutable:
3531 it changes the size of a string. We get away with this only if there
3532 is only one module referencing the object. You can also think of it
3533 as creating a new string object and destroying the old one, only
3534 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003535 already be known to some other part of the code...
3536 Note that if there's not enough memory to resize the string, the original
3537 string object at *pv is deallocated, *pv is set to NULL, an "out of
3538 memory" exception is set, and -1 is returned. Else (on success) 0 is
3539 returned, and the value in *pv may or may not be the same as on input.
3540 As always, an extra byte is allocated for a trailing \0 byte (newsize
3541 does *not* include that), and a trailing \0 byte is stored.
3542*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003543
3544int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003545_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003546{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003547 register PyObject *v;
3548 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003549 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003550 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3551 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003552 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003553 Py_DECREF(v);
3554 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003555 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003556 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003557 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003558 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003559 _Py_ForgetReference(v);
3560 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003561 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003562 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003563 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003564 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003565 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003566 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003567 _Py_NewReference(*pv);
3568 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003569 sv->ob_size = newsize;
3570 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003571 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003572 return 0;
3573}
Guido van Rossume5372401993-03-16 12:15:04 +00003574
3575/* Helpers for formatstring */
3576
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003577static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00003578getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003579{
Thomas Wouters977485d2006-02-16 15:59:12 +00003580 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003581 if (argidx < arglen) {
3582 (*p_argidx)++;
3583 if (arglen < 0)
3584 return args;
3585 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003586 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003587 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003588 PyErr_SetString(PyExc_TypeError,
3589 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003590 return NULL;
3591}
3592
Tim Peters38fd5b62000-09-21 05:43:11 +00003593/* Format codes
3594 * F_LJUST '-'
3595 * F_SIGN '+'
3596 * F_BLANK ' '
3597 * F_ALT '#'
3598 * F_ZERO '0'
3599 */
Guido van Rossume5372401993-03-16 12:15:04 +00003600#define F_LJUST (1<<0)
3601#define F_SIGN (1<<1)
3602#define F_BLANK (1<<2)
3603#define F_ALT (1<<3)
3604#define F_ZERO (1<<4)
3605
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003606static int
Fred Drakeba096332000-07-09 07:04:36 +00003607formatfloat(char *buf, size_t buflen, int flags,
3608 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003609{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003610 /* fmt = '%#.' + `prec` + `type`
3611 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003612 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003613 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003614 x = PyFloat_AsDouble(v);
3615 if (x == -1.0 && PyErr_Occurred()) {
3616 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003617 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003618 }
Guido van Rossume5372401993-03-16 12:15:04 +00003619 if (prec < 0)
3620 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003621 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3622 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003623 /* Worst case length calc to ensure no buffer overrun:
3624
3625 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003626 fmt = %#.<prec>g
3627 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003628 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003629 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003630
3631 'f' formats:
3632 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3633 len = 1 + 50 + 1 + prec = 52 + prec
3634
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003635 If prec=0 the effective precision is 1 (the leading digit is
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003636 always given), therefore increase the length by one.
3637
3638 */
3639 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3640 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003641 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003642 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003643 return -1;
3644 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003645 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3646 (flags&F_ALT) ? "#" : "",
3647 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003648 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003649 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003650}
3651
Tim Peters38fd5b62000-09-21 05:43:11 +00003652/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3653 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3654 * Python's regular ints.
3655 * Return value: a new PyString*, or NULL if error.
3656 * . *pbuf is set to point into it,
3657 * *plen set to the # of chars following that.
3658 * Caller must decref it when done using pbuf.
3659 * The string starting at *pbuf is of the form
3660 * "-"? ("0x" | "0X")? digit+
3661 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003662 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003663 * There will be at least prec digits, zero-filled on the left if
3664 * necessary to get that many.
3665 * val object to be converted
3666 * flags bitmask of format flags; only F_ALT is looked at
3667 * prec minimum number of digits; 0-fill on left if needed
3668 * type a character in [duoxX]; u acts the same as d
3669 *
3670 * CAUTION: o, x and X conversions on regular ints can never
3671 * produce a '-' sign, but can for Python's unbounded ints.
3672 */
3673PyObject*
3674_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3675 char **pbuf, int *plen)
3676{
3677 PyObject *result = NULL;
3678 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003679 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003680 int sign; /* 1 if '-', else 0 */
3681 int len; /* number of characters */
3682 int numdigits; /* len == numnondigits + numdigits */
3683 int numnondigits = 0;
3684
3685 switch (type) {
3686 case 'd':
3687 case 'u':
3688 result = val->ob_type->tp_str(val);
3689 break;
3690 case 'o':
3691 result = val->ob_type->tp_as_number->nb_oct(val);
3692 break;
3693 case 'x':
3694 case 'X':
3695 numnondigits = 2;
3696 result = val->ob_type->tp_as_number->nb_hex(val);
3697 break;
3698 default:
3699 assert(!"'type' not in [duoxX]");
3700 }
3701 if (!result)
3702 return NULL;
3703
3704 /* To modify the string in-place, there can only be one reference. */
3705 if (result->ob_refcnt != 1) {
3706 PyErr_BadInternalCall();
3707 return NULL;
3708 }
3709 buf = PyString_AsString(result);
3710 len = PyString_Size(result);
3711 if (buf[len-1] == 'L') {
3712 --len;
3713 buf[len] = '\0';
3714 }
3715 sign = buf[0] == '-';
3716 numnondigits += sign;
3717 numdigits = len - numnondigits;
3718 assert(numdigits > 0);
3719
Tim Petersfff53252001-04-12 18:38:48 +00003720 /* Get rid of base marker unless F_ALT */
3721 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003722 /* Need to skip 0x, 0X or 0. */
3723 int skipped = 0;
3724 switch (type) {
3725 case 'o':
3726 assert(buf[sign] == '0');
3727 /* If 0 is only digit, leave it alone. */
3728 if (numdigits > 1) {
3729 skipped = 1;
3730 --numdigits;
3731 }
3732 break;
3733 case 'x':
3734 case 'X':
3735 assert(buf[sign] == '0');
3736 assert(buf[sign + 1] == 'x');
3737 skipped = 2;
3738 numnondigits -= 2;
3739 break;
3740 }
3741 if (skipped) {
3742 buf += skipped;
3743 len -= skipped;
3744 if (sign)
3745 buf[0] = '-';
3746 }
3747 assert(len == numnondigits + numdigits);
3748 assert(numdigits > 0);
3749 }
3750
3751 /* Fill with leading zeroes to meet minimum width. */
3752 if (prec > numdigits) {
3753 PyObject *r1 = PyString_FromStringAndSize(NULL,
3754 numnondigits + prec);
3755 char *b1;
3756 if (!r1) {
3757 Py_DECREF(result);
3758 return NULL;
3759 }
3760 b1 = PyString_AS_STRING(r1);
3761 for (i = 0; i < numnondigits; ++i)
3762 *b1++ = *buf++;
3763 for (i = 0; i < prec - numdigits; i++)
3764 *b1++ = '0';
3765 for (i = 0; i < numdigits; i++)
3766 *b1++ = *buf++;
3767 *b1 = '\0';
3768 Py_DECREF(result);
3769 result = r1;
3770 buf = PyString_AS_STRING(result);
3771 len = numnondigits + prec;
3772 }
3773
3774 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003775 if (type == 'X') {
3776 /* Need to convert all lower case letters to upper case.
3777 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003778 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003779 if (buf[i] >= 'a' && buf[i] <= 'x')
3780 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003781 }
3782 *pbuf = buf;
3783 *plen = len;
3784 return result;
3785}
3786
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003787static int
Fred Drakeba096332000-07-09 07:04:36 +00003788formatint(char *buf, size_t buflen, int flags,
3789 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003790{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003791 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003792 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3793 + 1 + 1 = 24 */
3794 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003795 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003796 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003797
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003798 x = PyInt_AsLong(v);
3799 if (x == -1 && PyErr_Occurred()) {
3800 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003801 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003802 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003803 if (x < 0 && type == 'u') {
3804 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003805 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003806 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3807 sign = "-";
3808 else
3809 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003810 if (prec < 0)
3811 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003812
3813 if ((flags & F_ALT) &&
3814 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003815 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003816 * of issues that cause pain:
3817 * - when 0 is being converted, the C standard leaves off
3818 * the '0x' or '0X', which is inconsistent with other
3819 * %#x/%#X conversions and inconsistent with Python's
3820 * hex() function
3821 * - there are platforms that violate the standard and
3822 * convert 0 with the '0x' or '0X'
3823 * (Metrowerks, Compaq Tru64)
3824 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003825 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003826 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003827 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003828 * We can achieve the desired consistency by inserting our
3829 * own '0x' or '0X' prefix, and substituting %x/%X in place
3830 * of %#x/%#X.
3831 *
3832 * Note that this is the same approach as used in
3833 * formatint() in unicodeobject.c
3834 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003835 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3836 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003837 }
3838 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003839 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3840 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003841 prec, type);
3842 }
3843
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003844 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3845 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003846 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003847 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003848 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003849 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003850 return -1;
3851 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003852 if (sign[0])
3853 PyOS_snprintf(buf, buflen, fmt, -x);
3854 else
3855 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003856 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003857}
3858
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003859static int
Fred Drakeba096332000-07-09 07:04:36 +00003860formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003861{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003862 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003863 if (PyString_Check(v)) {
3864 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003865 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003866 }
3867 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003868 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003869 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003870 }
3871 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003872 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003873}
3874
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003875/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3876
3877 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3878 chars are formatted. XXX This is a magic number. Each formatting
3879 routine does bounds checking to ensure no overflow, but a better
3880 solution may be to malloc a buffer of appropriate size for each
3881 format. For now, the current solution is sufficient.
3882*/
3883#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003884
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003885PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003886PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003887{
3888 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003889 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003890 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00003891 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003892 PyObject *result, *orig_args;
3893#ifdef Py_USING_UNICODE
3894 PyObject *v, *w;
3895#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003896 PyObject *dict = NULL;
3897 if (format == NULL || !PyString_Check(format) || args == NULL) {
3898 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003899 return NULL;
3900 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003901 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003902 fmt = PyString_AS_STRING(format);
3903 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003904 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003905 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003906 if (result == NULL)
3907 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003908 res = PyString_AsString(result);
3909 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003910 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003911 argidx = 0;
3912 }
3913 else {
3914 arglen = -1;
3915 argidx = -2;
3916 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003917 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3918 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003919 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003920 while (--fmtcnt >= 0) {
3921 if (*fmt != '%') {
3922 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003923 rescnt = fmtcnt + 100;
3924 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003925 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003926 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003927 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003928 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003929 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003930 }
3931 *res++ = *fmt++;
3932 }
3933 else {
3934 /* Got a format specifier */
3935 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003936 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003937 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003938 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003939 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003940 PyObject *v = NULL;
3941 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003942 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003943 int sign;
3944 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003945 char formatbuf[FORMATBUFLEN];
3946 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003947#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003948 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003949 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003950#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003951
Guido van Rossumda9c2711996-12-05 21:58:58 +00003952 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003953 if (*fmt == '(') {
3954 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003955 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003956 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003957 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003958
3959 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003960 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003961 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003962 goto error;
3963 }
3964 ++fmt;
3965 --fmtcnt;
3966 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003967 /* Skip over balanced parentheses */
3968 while (pcount > 0 && --fmtcnt >= 0) {
3969 if (*fmt == ')')
3970 --pcount;
3971 else if (*fmt == '(')
3972 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003973 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003974 }
3975 keylen = fmt - keystart - 1;
3976 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003977 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003978 "incomplete format key");
3979 goto error;
3980 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003981 key = PyString_FromStringAndSize(keystart,
3982 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003983 if (key == NULL)
3984 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003985 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003986 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003987 args_owned = 0;
3988 }
3989 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003990 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003991 if (args == NULL) {
3992 goto error;
3993 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003994 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003995 arglen = -1;
3996 argidx = -2;
3997 }
Guido van Rossume5372401993-03-16 12:15:04 +00003998 while (--fmtcnt >= 0) {
3999 switch (c = *fmt++) {
4000 case '-': flags |= F_LJUST; continue;
4001 case '+': flags |= F_SIGN; continue;
4002 case ' ': flags |= F_BLANK; continue;
4003 case '#': flags |= F_ALT; continue;
4004 case '0': flags |= F_ZERO; continue;
4005 }
4006 break;
4007 }
4008 if (c == '*') {
4009 v = getnextarg(args, arglen, &argidx);
4010 if (v == NULL)
4011 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004012 if (!PyInt_Check(v)) {
4013 PyErr_SetString(PyExc_TypeError,
4014 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004015 goto error;
4016 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004017 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004018 if (width < 0) {
4019 flags |= F_LJUST;
4020 width = -width;
4021 }
Guido van Rossume5372401993-03-16 12:15:04 +00004022 if (--fmtcnt >= 0)
4023 c = *fmt++;
4024 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004025 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004026 width = c - '0';
4027 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004028 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004029 if (!isdigit(c))
4030 break;
4031 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004032 PyErr_SetString(
4033 PyExc_ValueError,
4034 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004035 goto error;
4036 }
4037 width = width*10 + (c - '0');
4038 }
4039 }
4040 if (c == '.') {
4041 prec = 0;
4042 if (--fmtcnt >= 0)
4043 c = *fmt++;
4044 if (c == '*') {
4045 v = getnextarg(args, arglen, &argidx);
4046 if (v == NULL)
4047 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004048 if (!PyInt_Check(v)) {
4049 PyErr_SetString(
4050 PyExc_TypeError,
4051 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004052 goto error;
4053 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004054 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004055 if (prec < 0)
4056 prec = 0;
4057 if (--fmtcnt >= 0)
4058 c = *fmt++;
4059 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004060 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004061 prec = c - '0';
4062 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004063 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004064 if (!isdigit(c))
4065 break;
4066 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004067 PyErr_SetString(
4068 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004069 "prec too big");
4070 goto error;
4071 }
4072 prec = prec*10 + (c - '0');
4073 }
4074 }
4075 } /* prec */
4076 if (fmtcnt >= 0) {
4077 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004078 if (--fmtcnt >= 0)
4079 c = *fmt++;
4080 }
4081 }
4082 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004083 PyErr_SetString(PyExc_ValueError,
4084 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004085 goto error;
4086 }
4087 if (c != '%') {
4088 v = getnextarg(args, arglen, &argidx);
4089 if (v == NULL)
4090 goto error;
4091 }
4092 sign = 0;
4093 fill = ' ';
4094 switch (c) {
4095 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004096 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004097 len = 1;
4098 break;
4099 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004100#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004101 if (PyUnicode_Check(v)) {
4102 fmt = fmt_start;
4103 argidx = argidx_start;
4104 goto unicode;
4105 }
Georg Brandld45014b2005-10-01 17:06:00 +00004106#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004107 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004108#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004109 if (temp != NULL && PyUnicode_Check(temp)) {
4110 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004111 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004112 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004113 goto unicode;
4114 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004115#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004116 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004117 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004118 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004119 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004120 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004121 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004122 if (!PyString_Check(temp)) {
4123 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004124 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004125 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004126 goto error;
4127 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004128 pbuf = PyString_AS_STRING(temp);
4129 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004130 if (prec >= 0 && len > prec)
4131 len = prec;
4132 break;
4133 case 'i':
4134 case 'd':
4135 case 'u':
4136 case 'o':
4137 case 'x':
4138 case 'X':
4139 if (c == 'i')
4140 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004141 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004142 temp = _PyString_FormatLong(v, flags,
4143 prec, c, &pbuf, &len);
4144 if (!temp)
4145 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004146 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004147 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004148 else {
4149 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004150 len = formatint(pbuf,
4151 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004152 flags, prec, c, v);
4153 if (len < 0)
4154 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004155 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004156 }
4157 if (flags & F_ZERO)
4158 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004159 break;
4160 case 'e':
4161 case 'E':
4162 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004163 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004164 case 'g':
4165 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004166 if (c == 'F')
4167 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004168 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004169 len = formatfloat(pbuf, sizeof(formatbuf),
4170 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004171 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004172 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004173 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004174 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004175 fill = '0';
4176 break;
4177 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004178#ifdef Py_USING_UNICODE
4179 if (PyUnicode_Check(v)) {
4180 fmt = fmt_start;
4181 argidx = argidx_start;
4182 goto unicode;
4183 }
4184#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004185 pbuf = formatbuf;
4186 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004187 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004188 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004189 break;
4190 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004191 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004192 "unsupported format character '%c' (0x%x) "
4193 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004194 c, c,
4195 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004196 goto error;
4197 }
4198 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004199 if (*pbuf == '-' || *pbuf == '+') {
4200 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004201 len--;
4202 }
4203 else if (flags & F_SIGN)
4204 sign = '+';
4205 else if (flags & F_BLANK)
4206 sign = ' ';
4207 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004208 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004209 }
4210 if (width < len)
4211 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004212 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004213 reslen -= rescnt;
4214 rescnt = width + fmtcnt + 100;
4215 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004216 if (reslen < 0) {
4217 Py_DECREF(result);
4218 return PyErr_NoMemory();
4219 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004220 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004221 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004222 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004223 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004224 }
4225 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004226 if (fill != ' ')
4227 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004228 rescnt--;
4229 if (width > len)
4230 width--;
4231 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004232 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4233 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004234 assert(pbuf[1] == c);
4235 if (fill != ' ') {
4236 *res++ = *pbuf++;
4237 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004238 }
Tim Petersfff53252001-04-12 18:38:48 +00004239 rescnt -= 2;
4240 width -= 2;
4241 if (width < 0)
4242 width = 0;
4243 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004244 }
4245 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004246 do {
4247 --rescnt;
4248 *res++ = fill;
4249 } while (--width > len);
4250 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004251 if (fill == ' ') {
4252 if (sign)
4253 *res++ = sign;
4254 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004255 (c == 'x' || c == 'X')) {
4256 assert(pbuf[0] == '0');
4257 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004258 *res++ = *pbuf++;
4259 *res++ = *pbuf++;
4260 }
4261 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004262 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004263 res += len;
4264 rescnt -= len;
4265 while (--width >= len) {
4266 --rescnt;
4267 *res++ = ' ';
4268 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004269 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004270 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004271 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004272 goto error;
4273 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004274 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004275 } /* '%' */
4276 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004277 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004278 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004279 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004280 goto error;
4281 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004282 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004283 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004284 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004285 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004286 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004287
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004288#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004289 unicode:
4290 if (args_owned) {
4291 Py_DECREF(args);
4292 args_owned = 0;
4293 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004294 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004295 if (PyTuple_Check(orig_args) && argidx > 0) {
4296 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004297 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004298 v = PyTuple_New(n);
4299 if (v == NULL)
4300 goto error;
4301 while (--n >= 0) {
4302 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4303 Py_INCREF(w);
4304 PyTuple_SET_ITEM(v, n, w);
4305 }
4306 args = v;
4307 } else {
4308 Py_INCREF(orig_args);
4309 args = orig_args;
4310 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004311 args_owned = 1;
4312 /* Take what we have of the result and let the Unicode formatting
4313 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004314 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004315 if (_PyString_Resize(&result, rescnt))
4316 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004317 fmtcnt = PyString_GET_SIZE(format) - \
4318 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004319 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4320 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004321 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004322 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004323 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004324 if (v == NULL)
4325 goto error;
4326 /* Paste what we have (result) to what the Unicode formatting
4327 function returned (v) and return the result (or error) */
4328 w = PyUnicode_Concat(result, v);
4329 Py_DECREF(result);
4330 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004331 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004332 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004333#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004334
Guido van Rossume5372401993-03-16 12:15:04 +00004335 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004336 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004337 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004338 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004339 }
Guido van Rossume5372401993-03-16 12:15:04 +00004340 return NULL;
4341}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004342
Guido van Rossum2a61e741997-01-18 07:55:05 +00004343void
Fred Drakeba096332000-07-09 07:04:36 +00004344PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004345{
4346 register PyStringObject *s = (PyStringObject *)(*p);
4347 PyObject *t;
4348 if (s == NULL || !PyString_Check(s))
4349 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004350 /* If it's a string subclass, we don't really know what putting
4351 it in the interned dict might do. */
4352 if (!PyString_CheckExact(s))
4353 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004354 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004355 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004356 if (interned == NULL) {
4357 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004358 if (interned == NULL) {
4359 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004360 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004361 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004362 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004363 t = PyDict_GetItem(interned, (PyObject *)s);
4364 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004365 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004366 Py_DECREF(*p);
4367 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004368 return;
4369 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004370
Armin Rigo79f7ad22004-08-07 19:27:39 +00004371 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004372 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004373 return;
4374 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004375 /* The two references in interned are not counted by refcnt.
4376 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004377 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004378 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004379}
4380
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004381void
4382PyString_InternImmortal(PyObject **p)
4383{
4384 PyString_InternInPlace(p);
4385 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4386 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4387 Py_INCREF(*p);
4388 }
4389}
4390
Guido van Rossum2a61e741997-01-18 07:55:05 +00004391
4392PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004393PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004394{
4395 PyObject *s = PyString_FromString(cp);
4396 if (s == NULL)
4397 return NULL;
4398 PyString_InternInPlace(&s);
4399 return s;
4400}
4401
Guido van Rossum8cf04761997-08-02 02:57:45 +00004402void
Fred Drakeba096332000-07-09 07:04:36 +00004403PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004404{
4405 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004406 for (i = 0; i < UCHAR_MAX + 1; i++) {
4407 Py_XDECREF(characters[i]);
4408 characters[i] = NULL;
4409 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004410 Py_XDECREF(nullstring);
4411 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004412}
Barry Warsawa903ad982001-02-23 16:40:48 +00004413
Barry Warsawa903ad982001-02-23 16:40:48 +00004414void _Py_ReleaseInternedStrings(void)
4415{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004416 PyObject *keys;
4417 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004418 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004419
4420 if (interned == NULL || !PyDict_Check(interned))
4421 return;
4422 keys = PyDict_Keys(interned);
4423 if (keys == NULL || !PyList_Check(keys)) {
4424 PyErr_Clear();
4425 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004426 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004427
4428 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4429 detector, interned strings are not forcibly deallocated; rather, we
4430 give them their stolen references back, and then clear and DECREF
4431 the interned dict. */
4432
4433 fprintf(stderr, "releasing interned strings\n");
4434 n = PyList_GET_SIZE(keys);
4435 for (i = 0; i < n; i++) {
4436 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4437 switch (s->ob_sstate) {
4438 case SSTATE_NOT_INTERNED:
4439 /* XXX Shouldn't happen */
4440 break;
4441 case SSTATE_INTERNED_IMMORTAL:
4442 s->ob_refcnt += 1;
4443 break;
4444 case SSTATE_INTERNED_MORTAL:
4445 s->ob_refcnt += 2;
4446 break;
4447 default:
4448 Py_FatalError("Inconsistent interned string state.");
4449 }
4450 s->ob_sstate = SSTATE_NOT_INTERNED;
4451 }
4452 Py_DECREF(keys);
4453 PyDict_Clear(interned);
4454 Py_DECREF(interned);
4455 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004456}