blob: d23c97332ed8b0981e7390e6990b28f3c44d2ae2 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
Tim Petersae1d0c92006-03-17 03:29:34 +000019 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000020 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000157 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000184 /* likewise for %zd */
185 if (*f == 'z' && *(f+1) == 'd')
Tim Petersae1d0c92006-03-17 03:29:34 +0000186 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000187
Barry Warsawdadace02001-08-24 18:32:06 +0000188 switch (*f) {
189 case 'c':
190 (void)va_arg(count, int);
191 /* fall through... */
192 case '%':
193 n++;
194 break;
195 case 'd': case 'i': case 'x':
196 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000197 /* 20 bytes is enough to hold a 64-bit
198 integer. Decimal takes the most space.
199 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000200 n += 20;
201 break;
202 case 's':
203 s = va_arg(count, char*);
204 n += strlen(s);
205 break;
206 case 'p':
207 (void) va_arg(count, int);
208 /* maximum 64-bit pointer representation:
209 * 0xffffffffffffffff
210 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000211 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000212 */
213 n += 19;
214 break;
215 default:
216 /* if we stumble upon an unknown
217 formatting code, copy the rest of
218 the format string to the output
219 string. (we cannot just skip the
220 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000221 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000222 n += strlen(p);
223 goto expand;
224 }
225 } else
226 n++;
227 }
228 expand:
229 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000230 /* Since we've analyzed how much space we need for the worst case,
231 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 string = PyString_FromStringAndSize(NULL, n);
233 if (!string)
234 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000235
Barry Warsawdadace02001-08-24 18:32:06 +0000236 s = PyString_AsString(string);
237
238 for (f = format; *f; f++) {
239 if (*f == '%') {
240 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000241 Py_ssize_t i;
242 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000243 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 /* handle the size_t flag. */
264 if (*f == 'z' && *(f+1) == 'd') {
265 size_tflag = 1;
266 ++f;
267 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000268
Barry Warsawdadace02001-08-24 18:32:06 +0000269 switch (*f) {
270 case 'c':
271 *s++ = va_arg(vargs, int);
272 break;
273 case 'd':
274 if (longflag)
275 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000276 else if (size_tflag)
277 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
278 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000279 else
280 sprintf(s, "%d", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 'i':
284 sprintf(s, "%i", va_arg(vargs, int));
285 s += strlen(s);
286 break;
287 case 'x':
288 sprintf(s, "%x", va_arg(vargs, int));
289 s += strlen(s);
290 break;
291 case 's':
292 p = va_arg(vargs, char*);
293 i = strlen(p);
294 if (n > 0 && i > n)
295 i = n;
296 memcpy(s, p, i);
297 s += i;
298 break;
299 case 'p':
300 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000301 /* %p is ill-defined: ensure leading 0x. */
302 if (s[1] == 'X')
303 s[1] = 'x';
304 else if (s[1] != 'x') {
305 memmove(s+2, s, strlen(s)+1);
306 s[0] = '0';
307 s[1] = 'x';
308 }
Barry Warsawdadace02001-08-24 18:32:06 +0000309 s += strlen(s);
310 break;
311 case '%':
312 *s++ = '%';
313 break;
314 default:
315 strcpy(s, p);
316 s += strlen(s);
317 goto end;
318 }
319 } else
320 *s++ = *f;
321 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000322
Barry Warsawdadace02001-08-24 18:32:06 +0000323 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000324 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000325 return string;
326}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000327
Barry Warsawdadace02001-08-24 18:32:06 +0000328PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000330{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000332 va_list vargs;
333
334#ifdef HAVE_STDARG_PROTOTYPES
335 va_start(vargs, format);
336#else
337 va_start(vargs);
338#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000339 ret = PyString_FromFormatV(format, vargs);
340 va_end(vargs);
341 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342}
343
344
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000345PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000346 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000347 const char *encoding,
348 const char *errors)
349{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000350 PyObject *v, *str;
351
352 str = PyString_FromStringAndSize(s, size);
353 if (str == NULL)
354 return NULL;
355 v = PyString_AsDecodedString(str, encoding, errors);
356 Py_DECREF(str);
357 return v;
358}
359
360PyObject *PyString_AsDecodedObject(PyObject *str,
361 const char *encoding,
362 const char *errors)
363{
364 PyObject *v;
365
366 if (!PyString_Check(str)) {
367 PyErr_BadArgument();
368 goto onError;
369 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000370
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000371 if (encoding == NULL) {
372#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000373 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000374#else
375 PyErr_SetString(PyExc_ValueError, "no encoding specified");
376 goto onError;
377#endif
378 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379
380 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000381 v = PyCodec_Decode(str, encoding, errors);
382 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000384
385 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000386
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000387 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 return NULL;
389}
390
391PyObject *PyString_AsDecodedString(PyObject *str,
392 const char *encoding,
393 const char *errors)
394{
395 PyObject *v;
396
397 v = PyString_AsDecodedObject(str, encoding, errors);
398 if (v == NULL)
399 goto onError;
400
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000401#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000402 /* Convert Unicode to a string using the default encoding */
403 if (PyUnicode_Check(v)) {
404 PyObject *temp = v;
405 v = PyUnicode_AsEncodedString(v, NULL, NULL);
406 Py_DECREF(temp);
407 if (v == NULL)
408 goto onError;
409 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000410#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411 if (!PyString_Check(v)) {
412 PyErr_Format(PyExc_TypeError,
413 "decoder did not return a string object (type=%.400s)",
414 v->ob_type->tp_name);
415 Py_DECREF(v);
416 goto onError;
417 }
418
419 return v;
420
421 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000422 return NULL;
423}
424
425PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000426 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 const char *encoding,
428 const char *errors)
429{
430 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000431
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 str = PyString_FromStringAndSize(s, size);
433 if (str == NULL)
434 return NULL;
435 v = PyString_AsEncodedString(str, encoding, errors);
436 Py_DECREF(str);
437 return v;
438}
439
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000440PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 const char *encoding,
442 const char *errors)
443{
444 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000445
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446 if (!PyString_Check(str)) {
447 PyErr_BadArgument();
448 goto onError;
449 }
450
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000451 if (encoding == NULL) {
452#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454#else
455 PyErr_SetString(PyExc_ValueError, "no encoding specified");
456 goto onError;
457#endif
458 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459
460 /* Encode via the codec registry */
461 v = PyCodec_Encode(str, encoding, errors);
462 if (v == NULL)
463 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000464
465 return v;
466
467 onError:
468 return NULL;
469}
470
471PyObject *PyString_AsEncodedString(PyObject *str,
472 const char *encoding,
473 const char *errors)
474{
475 PyObject *v;
476
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000477 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000478 if (v == NULL)
479 goto onError;
480
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000481#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000482 /* Convert Unicode to a string using the default encoding */
483 if (PyUnicode_Check(v)) {
484 PyObject *temp = v;
485 v = PyUnicode_AsEncodedString(v, NULL, NULL);
486 Py_DECREF(temp);
487 if (v == NULL)
488 goto onError;
489 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000490#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 if (!PyString_Check(v)) {
492 PyErr_Format(PyExc_TypeError,
493 "encoder did not return a string object (type=%.400s)",
494 v->ob_type->tp_name);
495 Py_DECREF(v);
496 goto onError;
497 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000498
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000499 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000500
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 onError:
502 return NULL;
503}
504
Guido van Rossum234f9421993-06-17 12:35:49 +0000505static void
Fred Drakeba096332000-07-09 07:04:36 +0000506string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000507{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000508 switch (PyString_CHECK_INTERNED(op)) {
509 case SSTATE_NOT_INTERNED:
510 break;
511
512 case SSTATE_INTERNED_MORTAL:
513 /* revive dead object temporarily for DelItem */
514 op->ob_refcnt = 3;
515 if (PyDict_DelItem(interned, op) != 0)
516 Py_FatalError(
517 "deletion of interned string failed");
518 break;
519
520 case SSTATE_INTERNED_IMMORTAL:
521 Py_FatalError("Immortal interned string died.");
522
523 default:
524 Py_FatalError("Inconsistent interned string state.");
525 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000526 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000527}
528
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000529/* Unescape a backslash-escaped string. If unicode is non-zero,
530 the string is a u-literal. If recode_encoding is non-zero,
531 the string is UTF-8 encoded and should be re-encoded in the
532 specified encoding. */
533
534PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000535 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000536 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000537 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 const char *recode_encoding)
539{
540 int c;
541 char *p, *buf;
542 const char *end;
543 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000544 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000545 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 if (v == NULL)
547 return NULL;
548 p = buf = PyString_AsString(v);
549 end = s + len;
550 while (s < end) {
551 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000552 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000553#ifdef Py_USING_UNICODE
554 if (recode_encoding && (*s & 0x80)) {
555 PyObject *u, *w;
556 char *r;
557 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000558 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000559 t = s;
560 /* Decode non-ASCII bytes as UTF-8. */
561 while (t < end && (*t & 0x80)) t++;
562 u = PyUnicode_DecodeUTF8(s, t - s, errors);
563 if(!u) goto failed;
564
565 /* Recode them in target encoding. */
566 w = PyUnicode_AsEncodedString(
567 u, recode_encoding, errors);
568 Py_DECREF(u);
569 if (!w) goto failed;
570
571 /* Append bytes to output buffer. */
572 r = PyString_AsString(w);
573 rn = PyString_Size(w);
574 memcpy(p, r, rn);
575 p += rn;
576 Py_DECREF(w);
577 s = t;
578 } else {
579 *p++ = *s++;
580 }
581#else
582 *p++ = *s++;
583#endif
584 continue;
585 }
586 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000587 if (s==end) {
588 PyErr_SetString(PyExc_ValueError,
589 "Trailing \\ in string");
590 goto failed;
591 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000592 switch (*s++) {
593 /* XXX This assumes ASCII! */
594 case '\n': break;
595 case '\\': *p++ = '\\'; break;
596 case '\'': *p++ = '\''; break;
597 case '\"': *p++ = '\"'; break;
598 case 'b': *p++ = '\b'; break;
599 case 'f': *p++ = '\014'; break; /* FF */
600 case 't': *p++ = '\t'; break;
601 case 'n': *p++ = '\n'; break;
602 case 'r': *p++ = '\r'; break;
603 case 'v': *p++ = '\013'; break; /* VT */
604 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
605 case '0': case '1': case '2': case '3':
606 case '4': case '5': case '6': case '7':
607 c = s[-1] - '0';
608 if ('0' <= *s && *s <= '7') {
609 c = (c<<3) + *s++ - '0';
610 if ('0' <= *s && *s <= '7')
611 c = (c<<3) + *s++ - '0';
612 }
613 *p++ = c;
614 break;
615 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000616 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000617 && isxdigit(Py_CHARMASK(s[1]))) {
618 unsigned int x = 0;
619 c = Py_CHARMASK(*s);
620 s++;
621 if (isdigit(c))
622 x = c - '0';
623 else if (islower(c))
624 x = 10 + c - 'a';
625 else
626 x = 10 + c - 'A';
627 x = x << 4;
628 c = Py_CHARMASK(*s);
629 s++;
630 if (isdigit(c))
631 x += c - '0';
632 else if (islower(c))
633 x += 10 + c - 'a';
634 else
635 x += 10 + c - 'A';
636 *p++ = x;
637 break;
638 }
639 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000640 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000641 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000642 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000643 }
644 if (strcmp(errors, "replace") == 0) {
645 *p++ = '?';
646 } else if (strcmp(errors, "ignore") == 0)
647 /* do nothing */;
648 else {
649 PyErr_Format(PyExc_ValueError,
650 "decoding error; "
651 "unknown error handling code: %.400s",
652 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655#ifndef Py_USING_UNICODE
656 case 'u':
657 case 'U':
658 case 'N':
659 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000660 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000661 "Unicode escapes not legal "
662 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000663 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000664 }
665#endif
666 default:
667 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000668 s--;
669 goto non_esc; /* an arbitry number of unescaped
670 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000671 }
672 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000673 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000674 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 return v;
676 failed:
677 Py_DECREF(v);
678 return NULL;
679}
680
Martin v. Löwis18e16552006-02-15 17:27:45 +0000681static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000682string_getsize(register PyObject *op)
683{
684 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000686 if (PyString_AsStringAndSize(op, &s, &len))
687 return -1;
688 return len;
689}
690
691static /*const*/ char *
692string_getbuffer(register PyObject *op)
693{
694 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696 if (PyString_AsStringAndSize(op, &s, &len))
697 return NULL;
698 return s;
699}
700
Martin v. Löwis18e16552006-02-15 17:27:45 +0000701Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000702PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000703{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000704 if (!PyString_Check(op))
705 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000706 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000707}
708
709/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000710PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000711{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712 if (!PyString_Check(op))
713 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000714 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000715}
716
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000717int
718PyString_AsStringAndSize(register PyObject *obj,
719 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000720 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000721{
722 if (s == NULL) {
723 PyErr_BadInternalCall();
724 return -1;
725 }
726
727 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000728#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000729 if (PyUnicode_Check(obj)) {
730 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
731 if (obj == NULL)
732 return -1;
733 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000734 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000735#endif
736 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737 PyErr_Format(PyExc_TypeError,
738 "expected string or Unicode object, "
739 "%.200s found", obj->ob_type->tp_name);
740 return -1;
741 }
742 }
743
744 *s = PyString_AS_STRING(obj);
745 if (len != NULL)
746 *len = PyString_GET_SIZE(obj);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000747 else if (strlen(*s) != PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000748 PyErr_SetString(PyExc_TypeError,
749 "expected string without null bytes");
750 return -1;
751 }
752 return 0;
753}
754
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000755/* Methods */
756
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000757static int
Fred Drakeba096332000-07-09 07:04:36 +0000758string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000759{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000760 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000761 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000762 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000763
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000764 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000765 if (! PyString_CheckExact(op)) {
766 int ret;
767 /* A str subclass may have its own __str__ method. */
768 op = (PyStringObject *) PyObject_Str((PyObject *)op);
769 if (op == NULL)
770 return -1;
771 ret = string_print(op, fp, flags);
772 Py_DECREF(op);
773 return ret;
774 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000775 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000776#ifdef __VMS
777 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
778#else
779 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
780#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000781 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000783
Thomas Wouters7e474022000-07-16 12:04:32 +0000784 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000785 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000786 if (memchr(op->ob_sval, '\'', op->ob_size) &&
787 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000788 quote = '"';
789
790 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000791 for (i = 0; i < op->ob_size; i++) {
792 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000794 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000795 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000796 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000797 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000798 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000799 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000800 fprintf(fp, "\\r");
801 else if (c < ' ' || c >= 0x7f)
802 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000803 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000804 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000805 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000806 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000807 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000808}
809
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000810PyObject *
811PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000812{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000813 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000814 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000815 PyObject *v;
816 if (newsize > INT_MAX) {
817 PyErr_SetString(PyExc_OverflowError,
818 "string is too large to make repr");
819 }
820 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000821 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000822 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823 }
824 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000825 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000826 register char c;
827 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828 int quote;
829
Thomas Wouters7e474022000-07-16 12:04:32 +0000830 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000832 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000833 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000834 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000835 quote = '"';
836
Tim Peters9161c8b2001-12-03 01:55:38 +0000837 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000838 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000839 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000840 /* There's at least enough room for a hex escape
841 and a closing quote. */
842 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000844 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000845 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000846 else if (c == '\t')
847 *p++ = '\\', *p++ = 't';
848 else if (c == '\n')
849 *p++ = '\\', *p++ = 'n';
850 else if (c == '\r')
851 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000852 else if (c < ' ' || c >= 0x7f) {
853 /* For performance, we don't want to call
854 PyOS_snprintf here (extra layers of
855 function call). */
856 sprintf(p, "\\x%02x", c & 0xff);
857 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000858 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000859 else
860 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000862 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000863 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000864 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000865 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000866 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000867 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000868 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869}
870
Guido van Rossum189f1df2001-05-01 16:51:53 +0000871static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000872string_repr(PyObject *op)
873{
874 return PyString_Repr(op, 1);
875}
876
877static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000878string_str(PyObject *s)
879{
Tim Petersc9933152001-10-16 20:18:24 +0000880 assert(PyString_Check(s));
881 if (PyString_CheckExact(s)) {
882 Py_INCREF(s);
883 return s;
884 }
885 else {
886 /* Subtype -- return genuine string with the same value. */
887 PyStringObject *t = (PyStringObject *) s;
888 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
889 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000890}
891
Martin v. Löwis18e16552006-02-15 17:27:45 +0000892static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000893string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000894{
895 return a->ob_size;
896}
897
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000898static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000899string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000901 register size_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000902 register PyStringObject *op;
903 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000904#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000905 if (PyUnicode_Check(bb))
906 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000907#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000908 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000909 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000910 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 return NULL;
912 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000914 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000915 if ((a->ob_size == 0 || b->ob_size == 0) &&
916 PyString_CheckExact(a) && PyString_CheckExact(b)) {
917 if (a->ob_size == 0) {
918 Py_INCREF(bb);
919 return bb;
920 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000921 Py_INCREF(a);
922 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923 }
924 size = a->ob_size + b->ob_size;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000925 /* XXX check overflow */
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000926 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000927 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000928 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000929 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000930 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000931 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000932 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000933 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
934 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000935 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000936 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937#undef b
938}
939
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000940static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000941string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000943 register Py_ssize_t i;
944 register Py_ssize_t j;
945 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000946 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000947 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948 if (n < 0)
949 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000950 /* watch out for overflows: the size can overflow int,
951 * and the # of bytes needed can overflow size_t
952 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000953 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000954 if (n && size / n != a->ob_size) {
955 PyErr_SetString(PyExc_OverflowError,
956 "repeated string is too long");
957 return NULL;
958 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000959 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000960 Py_INCREF(a);
961 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000962 }
Tim Peterse7c05322004-06-27 17:24:49 +0000963 nbytes = (size_t)size;
964 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000965 PyErr_SetString(PyExc_OverflowError,
966 "repeated string is too long");
967 return NULL;
968 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000969 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000970 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000971 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000972 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000973 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000974 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000975 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000976 op->ob_sval[size] = '\0';
977 if (a->ob_size == 1 && n > 0) {
978 memset(op->ob_sval, a->ob_sval[0] , n);
979 return (PyObject *) op;
980 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000981 i = 0;
982 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000983 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
984 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000985 }
986 while (i < size) {
987 j = (i <= size-i) ? i : size-i;
988 memcpy(op->ob_sval+i, op->ob_sval, j);
989 i += j;
990 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000991 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992}
993
994/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
995
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +0000997string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000998 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +0000999 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001000{
1001 if (i < 0)
1002 i = 0;
1003 if (j < 0)
1004 j = 0; /* Avoid signed/unsigned bug in next line */
1005 if (j > a->ob_size)
1006 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001007 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1008 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001009 Py_INCREF(a);
1010 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001011 }
1012 if (j < i)
1013 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001014 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001015}
1016
Guido van Rossum9284a572000-03-07 15:53:43 +00001017static int
Fred Drakeba096332000-07-09 07:04:36 +00001018string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001019{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001020 char *s = PyString_AS_STRING(a);
1021 const char *sub = PyString_AS_STRING(el);
1022 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001023 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001024 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001025 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001026
1027 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001028#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001029 if (PyUnicode_Check(el))
1030 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001031#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001032 if (!PyString_Check(el)) {
1033 PyErr_SetString(PyExc_TypeError,
1034 "'in <string>' requires string as left operand");
1035 return -1;
1036 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001037 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001038
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001039 if (len_sub == 0)
1040 return 1;
Tim Petersae1d0c92006-03-17 03:29:34 +00001041 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001042 substring. When s<last, there is still room for a possible match
1043 and s[0] through s[len_sub-1] will be in bounds.
1044 shortsub is len_sub minus the last character which is checked
1045 separately just before the memcmp(). That check helps prevent
1046 false starts and saves the setup time for memcmp().
1047 */
1048 firstchar = sub[0];
1049 shortsub = len_sub - 1;
1050 lastchar = sub[shortsub];
1051 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1052 while (s < last) {
1053 s = memchr(s, firstchar, last-s);
1054 if (s == NULL)
1055 return 0;
1056 assert(s < last);
1057 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001058 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001059 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001060 }
1061 return 0;
1062}
1063
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001064static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001065string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001066{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001067 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001068 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001069 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001070 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001071 return NULL;
1072 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001073 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001074 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001075 if (v == NULL)
1076 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001077 else {
1078#ifdef COUNT_ALLOCS
1079 one_strings++;
1080#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001081 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001082 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001083 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001084}
1085
Martin v. Löwiscd353062001-05-24 16:56:35 +00001086static PyObject*
1087string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001088{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001089 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001090 Py_ssize_t len_a, len_b;
1091 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001092 PyObject *result;
1093
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001094 /* Make sure both arguments are strings. */
1095 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001096 result = Py_NotImplemented;
1097 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001098 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001099 if (a == b) {
1100 switch (op) {
1101 case Py_EQ:case Py_LE:case Py_GE:
1102 result = Py_True;
1103 goto out;
1104 case Py_NE:case Py_LT:case Py_GT:
1105 result = Py_False;
1106 goto out;
1107 }
1108 }
1109 if (op == Py_EQ) {
1110 /* Supporting Py_NE here as well does not save
1111 much time, since Py_NE is rarely used. */
1112 if (a->ob_size == b->ob_size
1113 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001114 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001115 a->ob_size) == 0)) {
1116 result = Py_True;
1117 } else {
1118 result = Py_False;
1119 }
1120 goto out;
1121 }
1122 len_a = a->ob_size; len_b = b->ob_size;
1123 min_len = (len_a < len_b) ? len_a : len_b;
1124 if (min_len > 0) {
1125 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1126 if (c==0)
1127 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1128 }else
1129 c = 0;
1130 if (c == 0)
1131 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1132 switch (op) {
1133 case Py_LT: c = c < 0; break;
1134 case Py_LE: c = c <= 0; break;
1135 case Py_EQ: assert(0); break; /* unreachable */
1136 case Py_NE: c = c != 0; break;
1137 case Py_GT: c = c > 0; break;
1138 case Py_GE: c = c >= 0; break;
1139 default:
1140 result = Py_NotImplemented;
1141 goto out;
1142 }
1143 result = c ? Py_True : Py_False;
1144 out:
1145 Py_INCREF(result);
1146 return result;
1147}
1148
1149int
1150_PyString_Eq(PyObject *o1, PyObject *o2)
1151{
1152 PyStringObject *a, *b;
1153 a = (PyStringObject*)o1;
1154 b = (PyStringObject*)o2;
1155 return a->ob_size == b->ob_size
1156 && *a->ob_sval == *b->ob_sval
1157 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001158}
1159
Guido van Rossum9bfef441993-03-29 10:43:31 +00001160static long
Fred Drakeba096332000-07-09 07:04:36 +00001161string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001162{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001163 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001164 register unsigned char *p;
1165 register long x;
1166
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001167 if (a->ob_shash != -1)
1168 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001169 len = a->ob_size;
1170 p = (unsigned char *) a->ob_sval;
1171 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001172 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001173 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001174 x ^= a->ob_size;
1175 if (x == -1)
1176 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001177 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178 return x;
1179}
1180
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001181#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1182
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001183static PyObject*
1184string_subscript(PyStringObject* self, PyObject* item)
1185{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001186 PyNumberMethods *nb = item->ob_type->tp_as_number;
1187 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1188 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001189 if (i == -1 && PyErr_Occurred())
1190 return NULL;
1191 if (i < 0)
1192 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001193 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001194 }
1195 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001196 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001197 char* source_buf;
1198 char* result_buf;
1199 PyObject* result;
1200
Tim Petersae1d0c92006-03-17 03:29:34 +00001201 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001202 PyString_GET_SIZE(self),
1203 &start, &stop, &step, &slicelength) < 0) {
1204 return NULL;
1205 }
1206
1207 if (slicelength <= 0) {
1208 return PyString_FromStringAndSize("", 0);
1209 }
1210 else {
1211 source_buf = PyString_AsString((PyObject*)self);
1212 result_buf = PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001213 if (result_buf == NULL)
1214 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001215
Tim Petersae1d0c92006-03-17 03:29:34 +00001216 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217 cur += step, i++) {
1218 result_buf[i] = source_buf[cur];
1219 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001220
1221 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001222 slicelength);
1223 PyMem_Free(result_buf);
1224 return result;
1225 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001226 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001227 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001228 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001229 "string indices must be integers");
1230 return NULL;
1231 }
1232}
1233
Martin v. Löwis18e16552006-02-15 17:27:45 +00001234static Py_ssize_t
1235string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001236{
1237 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001238 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001239 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001240 return -1;
1241 }
1242 *ptr = (void *)self->ob_sval;
1243 return self->ob_size;
1244}
1245
Martin v. Löwis18e16552006-02-15 17:27:45 +00001246static Py_ssize_t
1247string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001248{
Guido van Rossum045e6881997-09-08 18:30:11 +00001249 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001250 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001251 return -1;
1252}
1253
Martin v. Löwis18e16552006-02-15 17:27:45 +00001254static Py_ssize_t
1255string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001256{
1257 if ( lenp )
1258 *lenp = self->ob_size;
1259 return 1;
1260}
1261
Martin v. Löwis18e16552006-02-15 17:27:45 +00001262static Py_ssize_t
1263string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001264{
1265 if ( index != 0 ) {
1266 PyErr_SetString(PyExc_SystemError,
1267 "accessing non-existent string segment");
1268 return -1;
1269 }
1270 *ptr = self->ob_sval;
1271 return self->ob_size;
1272}
1273
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001274static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001275 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001276 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001277 (ssizeargfunc)string_repeat, /*sq_repeat*/
1278 (ssizeargfunc)string_item, /*sq_item*/
1279 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001280 0, /*sq_ass_item*/
1281 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001282 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001283};
1284
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001285static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001286 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001287 (binaryfunc)string_subscript,
1288 0,
1289};
1290
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001291static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292 (readbufferproc)string_buffer_getreadbuf,
1293 (writebufferproc)string_buffer_getwritebuf,
1294 (segcountproc)string_buffer_getsegcount,
1295 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001296};
1297
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001298
1299
1300#define LEFTSTRIP 0
1301#define RIGHTSTRIP 1
1302#define BOTHSTRIP 2
1303
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001304/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001305static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1306
1307#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001308
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001309#define SPLIT_APPEND(data, left, right) \
1310 str = PyString_FromStringAndSize((data) + (left), \
1311 (right) - (left)); \
1312 if (str == NULL) \
1313 goto onError; \
1314 if (PyList_Append(list, str)) { \
1315 Py_DECREF(str); \
1316 goto onError; \
1317 } \
1318 else \
1319 Py_DECREF(str);
1320
1321#define SPLIT_INSERT(data, left, right) \
1322 str = PyString_FromStringAndSize((data) + (left), \
1323 (right) - (left)); \
1324 if (str == NULL) \
1325 goto onError; \
1326 if (PyList_Insert(list, 0, str)) { \
1327 Py_DECREF(str); \
1328 goto onError; \
1329 } \
1330 else \
1331 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332
1333static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001334split_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001335{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001336 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001337 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338 PyObject *list = PyList_New(0);
1339
1340 if (list == NULL)
1341 return NULL;
1342
Guido van Rossum4c08d552000-03-10 22:55:18 +00001343 for (i = j = 0; i < len; ) {
1344 while (i < len && isspace(Py_CHARMASK(s[i])))
1345 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001347 while (i < len && !isspace(Py_CHARMASK(s[i])))
1348 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001350 if (maxsplit-- <= 0)
1351 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001352 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001353 while (i < len && isspace(Py_CHARMASK(s[i])))
1354 i++;
1355 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001356 }
1357 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001358 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001359 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001360 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001361 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001362 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001363 Py_DECREF(list);
1364 return NULL;
1365}
1366
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001367static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001368split_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001369{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001370 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001371 PyObject *str;
1372 PyObject *list = PyList_New(0);
1373
1374 if (list == NULL)
1375 return NULL;
1376
1377 for (i = j = 0; i < len; ) {
1378 if (s[i] == ch) {
1379 if (maxcount-- <= 0)
1380 break;
1381 SPLIT_APPEND(s, j, i);
1382 i = j = i + 1;
1383 } else
1384 i++;
1385 }
1386 if (j <= len) {
1387 SPLIT_APPEND(s, j, len);
1388 }
1389 return list;
1390
1391 onError:
1392 Py_DECREF(list);
1393 return NULL;
1394}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001395
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001396PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001397"S.split([sep [,maxsplit]]) -> list of strings\n\
1398\n\
1399Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001400delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001401splits are done. If sep is not specified or is None, any\n\
1402whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403
1404static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001405string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001407 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1408 int err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001409 int maxsplit = -1;
1410 const char *s = PyString_AS_STRING(self), *sub;
1411 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412
Guido van Rossum4c08d552000-03-10 22:55:18 +00001413 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001415 if (maxsplit < 0)
1416 maxsplit = INT_MAX;
1417 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001419 if (PyString_Check(subobj)) {
1420 sub = PyString_AS_STRING(subobj);
1421 n = PyString_GET_SIZE(subobj);
1422 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001423#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001424 else if (PyUnicode_Check(subobj))
1425 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001426#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001427 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1428 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001429
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001430 if (n == 0) {
1431 PyErr_SetString(PyExc_ValueError, "empty separator");
1432 return NULL;
1433 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001434 else if (n == 1)
1435 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001436
1437 list = PyList_New(0);
1438 if (list == NULL)
1439 return NULL;
1440
1441 i = j = 0;
1442 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001443 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001444 if (maxsplit-- <= 0)
1445 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001446 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447 if (item == NULL)
1448 goto fail;
1449 err = PyList_Append(list, item);
1450 Py_DECREF(item);
1451 if (err < 0)
1452 goto fail;
1453 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454 }
1455 else
1456 i++;
1457 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001458 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459 if (item == NULL)
1460 goto fail;
1461 err = PyList_Append(list, item);
1462 Py_DECREF(item);
1463 if (err < 0)
1464 goto fail;
1465
1466 return list;
1467
1468 fail:
1469 Py_DECREF(list);
1470 return NULL;
1471}
1472
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001473static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001474rsplit_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001475{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001476 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001477 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001478 PyObject *list = PyList_New(0);
1479
1480 if (list == NULL)
1481 return NULL;
1482
1483 for (i = j = len - 1; i >= 0; ) {
1484 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1485 i--;
1486 j = i;
1487 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1488 i--;
1489 if (j > i) {
1490 if (maxsplit-- <= 0)
1491 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001492 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001493 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1494 i--;
1495 j = i;
1496 }
1497 }
1498 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001499 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001500 }
1501 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001502 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001503 Py_DECREF(list);
1504 return NULL;
1505}
1506
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001507static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001508rsplit_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001509{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001510 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001511 PyObject *str;
1512 PyObject *list = PyList_New(0);
1513
1514 if (list == NULL)
1515 return NULL;
1516
1517 for (i = j = len - 1; i >= 0; ) {
1518 if (s[i] == ch) {
1519 if (maxcount-- <= 0)
1520 break;
1521 SPLIT_INSERT(s, i + 1, j + 1);
1522 j = i = i - 1;
1523 } else
1524 i--;
1525 }
1526 if (j >= -1) {
1527 SPLIT_INSERT(s, 0, j + 1);
1528 }
1529 return list;
1530
1531 onError:
1532 Py_DECREF(list);
1533 return NULL;
1534}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001535
1536PyDoc_STRVAR(rsplit__doc__,
1537"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1538\n\
1539Return a list of the words in the string S, using sep as the\n\
1540delimiter string, starting at the end of the string and working\n\
1541to the front. If maxsplit is given, at most maxsplit splits are\n\
1542done. If sep is not specified or is None, any whitespace string\n\
1543is a separator.");
1544
1545static PyObject *
1546string_rsplit(PyStringObject *self, PyObject *args)
1547{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001548 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1549 int err;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001550 int maxsplit = -1;
1551 const char *s = PyString_AS_STRING(self), *sub;
1552 PyObject *list, *item, *subobj = Py_None;
1553
1554 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1555 return NULL;
1556 if (maxsplit < 0)
1557 maxsplit = INT_MAX;
1558 if (subobj == Py_None)
1559 return rsplit_whitespace(s, len, maxsplit);
1560 if (PyString_Check(subobj)) {
1561 sub = PyString_AS_STRING(subobj);
1562 n = PyString_GET_SIZE(subobj);
1563 }
1564#ifdef Py_USING_UNICODE
1565 else if (PyUnicode_Check(subobj))
1566 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1567#endif
1568 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1569 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001570
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001571 if (n == 0) {
1572 PyErr_SetString(PyExc_ValueError, "empty separator");
1573 return NULL;
1574 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001575 else if (n == 1)
1576 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001577
1578 list = PyList_New(0);
1579 if (list == NULL)
1580 return NULL;
1581
1582 j = len;
1583 i = j - n;
1584 while (i >= 0) {
1585 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1586 if (maxsplit-- <= 0)
1587 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001588 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001589 if (item == NULL)
1590 goto fail;
1591 err = PyList_Insert(list, 0, item);
1592 Py_DECREF(item);
1593 if (err < 0)
1594 goto fail;
1595 j = i;
1596 i -= n;
1597 }
1598 else
1599 i--;
1600 }
1601 item = PyString_FromStringAndSize(s, j);
1602 if (item == NULL)
1603 goto fail;
1604 err = PyList_Insert(list, 0, item);
1605 Py_DECREF(item);
1606 if (err < 0)
1607 goto fail;
1608
1609 return list;
1610
1611 fail:
1612 Py_DECREF(list);
1613 return NULL;
1614}
1615
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001616
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001617PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618"S.join(sequence) -> string\n\
1619\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001620Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001621sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622
1623static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001624string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625{
1626 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001627 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001628 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001630 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001631 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001632 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001633 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634
Tim Peters19fe14e2001-01-19 03:03:47 +00001635 seq = PySequence_Fast(orig, "");
1636 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001637 return NULL;
1638 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001639
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001640 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001641 if (seqlen == 0) {
1642 Py_DECREF(seq);
1643 return PyString_FromString("");
1644 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001645 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001646 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001647 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1648 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001649 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001650 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001651 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001652 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001653
Raymond Hettinger674f2412004-08-23 23:23:54 +00001654 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001655 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001656 * Do a pre-pass to figure out the total amount of space we'll
1657 * need (sz), see whether any argument is absurd, and defer to
1658 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001659 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001660 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001661 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001662 item = PySequence_Fast_GET_ITEM(seq, i);
1663 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001664#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001665 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001666 /* Defer to Unicode join.
1667 * CAUTION: There's no gurantee that the
1668 * original sequence can be iterated over
1669 * again, so we must pass seq here.
1670 */
1671 PyObject *result;
1672 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001673 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001674 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001675 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001676#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001677 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001678 "sequence item %i: expected string,"
1679 " %.80s found",
Martin v. Löwis18e16552006-02-15 17:27:45 +00001680 /*XXX*/(int)i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001681 Py_DECREF(seq);
1682 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001683 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001684 sz += PyString_GET_SIZE(item);
1685 if (i != 0)
1686 sz += seplen;
1687 if (sz < old_sz || sz > INT_MAX) {
1688 PyErr_SetString(PyExc_OverflowError,
1689 "join() is too long for a Python string");
1690 Py_DECREF(seq);
1691 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001693 }
1694
1695 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001696 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001697 if (res == NULL) {
1698 Py_DECREF(seq);
1699 return NULL;
1700 }
1701
1702 /* Catenate everything. */
1703 p = PyString_AS_STRING(res);
1704 for (i = 0; i < seqlen; ++i) {
1705 size_t n;
1706 item = PySequence_Fast_GET_ITEM(seq, i);
1707 n = PyString_GET_SIZE(item);
1708 memcpy(p, PyString_AS_STRING(item), n);
1709 p += n;
1710 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001711 memcpy(p, sep, seplen);
1712 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001713 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001714 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001715
Jeremy Hylton49048292000-07-11 03:28:17 +00001716 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718}
1719
Tim Peters52e155e2001-06-16 05:42:57 +00001720PyObject *
1721_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001722{
Tim Petersa7259592001-06-16 05:11:17 +00001723 assert(sep != NULL && PyString_Check(sep));
1724 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001725 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001726}
1727
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001728static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001729string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001730{
1731 if (*end > len)
1732 *end = len;
1733 else if (*end < 0)
1734 *end += len;
1735 if (*end < 0)
1736 *end = 0;
1737 if (*start < 0)
1738 *start += len;
1739 if (*start < 0)
1740 *start = 0;
1741}
1742
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001744string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001746 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001747 Py_ssize_t len = PyString_GET_SIZE(self);
1748 Py_ssize_t n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001749 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001750
Martin v. Löwis18e16552006-02-15 17:27:45 +00001751 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001752 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001753 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001754 return -2;
1755 if (PyString_Check(subobj)) {
1756 sub = PyString_AS_STRING(subobj);
1757 n = PyString_GET_SIZE(subobj);
1758 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001759#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001761 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001762#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001763 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764 return -2;
1765
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001766 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767
Guido van Rossum4c08d552000-03-10 22:55:18 +00001768 if (dir > 0) {
1769 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001771 last -= n;
1772 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001773 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001774 return (long)i;
1775 }
1776 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001777 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001778
Guido van Rossum4c08d552000-03-10 22:55:18 +00001779 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001780 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001781 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001782 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001783 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001784 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001785
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786 return -1;
1787}
1788
1789
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001790PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791"S.find(sub [,start [,end]]) -> int\n\
1792\n\
1793Return the lowest index in S where substring sub is found,\n\
1794such that sub is contained within s[start,end]. Optional\n\
1795arguments start and end are interpreted as in slice notation.\n\
1796\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001797Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798
1799static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001800string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001802 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 if (result == -2)
1804 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001805 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806}
1807
1808
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001809PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810"S.index(sub [,start [,end]]) -> int\n\
1811\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001812Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813
1814static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001815string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001817 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818 if (result == -2)
1819 return NULL;
1820 if (result == -1) {
1821 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001822 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001823 return NULL;
1824 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001825 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826}
1827
1828
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001829PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830"S.rfind(sub [,start [,end]]) -> int\n\
1831\n\
1832Return the highest index in S where substring sub is found,\n\
1833such that sub is contained within s[start,end]. Optional\n\
1834arguments start and end are interpreted as in slice notation.\n\
1835\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001836Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001837
1838static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001839string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001840{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001841 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842 if (result == -2)
1843 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001844 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001845}
1846
1847
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001848PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849"S.rindex(sub [,start [,end]]) -> int\n\
1850\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001851Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852
1853static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001854string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001855{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001856 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001857 if (result == -2)
1858 return NULL;
1859 if (result == -1) {
1860 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001861 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862 return NULL;
1863 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001864 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001865}
1866
1867
1868static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001869do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1870{
1871 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001872 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001873 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001874 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1875 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001876
1877 i = 0;
1878 if (striptype != RIGHTSTRIP) {
1879 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1880 i++;
1881 }
1882 }
1883
1884 j = len;
1885 if (striptype != LEFTSTRIP) {
1886 do {
1887 j--;
1888 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1889 j++;
1890 }
1891
1892 if (i == 0 && j == len && PyString_CheckExact(self)) {
1893 Py_INCREF(self);
1894 return (PyObject*)self;
1895 }
1896 else
1897 return PyString_FromStringAndSize(s+i, j-i);
1898}
1899
1900
1901static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001902do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903{
1904 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001905 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907 i = 0;
1908 if (striptype != RIGHTSTRIP) {
1909 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1910 i++;
1911 }
1912 }
1913
1914 j = len;
1915 if (striptype != LEFTSTRIP) {
1916 do {
1917 j--;
1918 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1919 j++;
1920 }
1921
Tim Peters8fa5dd02001-09-12 02:18:30 +00001922 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923 Py_INCREF(self);
1924 return (PyObject*)self;
1925 }
1926 else
1927 return PyString_FromStringAndSize(s+i, j-i);
1928}
1929
1930
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001931static PyObject *
1932do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1933{
1934 PyObject *sep = NULL;
1935
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001936 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001937 return NULL;
1938
1939 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001940 if (PyString_Check(sep))
1941 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001942#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001943 else if (PyUnicode_Check(sep)) {
1944 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1945 PyObject *res;
1946 if (uniself==NULL)
1947 return NULL;
1948 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1949 striptype, sep);
1950 Py_DECREF(uniself);
1951 return res;
1952 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001953#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001954 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001955 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001956#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001957 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001958#else
1959 "%s arg must be None or str",
1960#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001961 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001962 return NULL;
1963 }
1964 return do_xstrip(self, striptype, sep);
1965 }
1966
1967 return do_strip(self, striptype);
1968}
1969
1970
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001971PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001972"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001973\n\
1974Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001975whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001976If chars is given and not None, remove characters in chars instead.\n\
1977If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978
1979static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001980string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001982 if (PyTuple_GET_SIZE(args) == 0)
1983 return do_strip(self, BOTHSTRIP); /* Common case */
1984 else
1985 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986}
1987
1988
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001989PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001990"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001991\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001992Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001993If chars is given and not None, remove characters in chars instead.\n\
1994If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001995
1996static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001997string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001999 if (PyTuple_GET_SIZE(args) == 0)
2000 return do_strip(self, LEFTSTRIP); /* Common case */
2001 else
2002 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003}
2004
2005
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002006PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002007"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002009Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002010If chars is given and not None, remove characters in chars instead.\n\
2011If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012
2013static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002014string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002016 if (PyTuple_GET_SIZE(args) == 0)
2017 return do_strip(self, RIGHTSTRIP); /* Common case */
2018 else
2019 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020}
2021
2022
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002023PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002024"S.lower() -> string\n\
2025\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002026Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002027
2028static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002029string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002030{
2031 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002032 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033 PyObject *new;
2034
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035 new = PyString_FromStringAndSize(NULL, n);
2036 if (new == NULL)
2037 return NULL;
2038 s_new = PyString_AsString(new);
2039 for (i = 0; i < n; i++) {
2040 int c = Py_CHARMASK(*s++);
2041 if (isupper(c)) {
2042 *s_new = tolower(c);
2043 } else
2044 *s_new = c;
2045 s_new++;
2046 }
2047 return new;
2048}
2049
2050
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002051PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052"S.upper() -> string\n\
2053\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002054Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002055
2056static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002057string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002058{
2059 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002060 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061 PyObject *new;
2062
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063 new = PyString_FromStringAndSize(NULL, n);
2064 if (new == NULL)
2065 return NULL;
2066 s_new = PyString_AsString(new);
2067 for (i = 0; i < n; i++) {
2068 int c = Py_CHARMASK(*s++);
2069 if (islower(c)) {
2070 *s_new = toupper(c);
2071 } else
2072 *s_new = c;
2073 s_new++;
2074 }
2075 return new;
2076}
2077
2078
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002079PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002080"S.title() -> string\n\
2081\n\
2082Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002083characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002084
2085static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002086string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002087{
2088 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002089 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002090 int previous_is_cased = 0;
2091 PyObject *new;
2092
Guido van Rossum4c08d552000-03-10 22:55:18 +00002093 new = PyString_FromStringAndSize(NULL, n);
2094 if (new == NULL)
2095 return NULL;
2096 s_new = PyString_AsString(new);
2097 for (i = 0; i < n; i++) {
2098 int c = Py_CHARMASK(*s++);
2099 if (islower(c)) {
2100 if (!previous_is_cased)
2101 c = toupper(c);
2102 previous_is_cased = 1;
2103 } else if (isupper(c)) {
2104 if (previous_is_cased)
2105 c = tolower(c);
2106 previous_is_cased = 1;
2107 } else
2108 previous_is_cased = 0;
2109 *s_new++ = c;
2110 }
2111 return new;
2112}
2113
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002114PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002115"S.capitalize() -> string\n\
2116\n\
2117Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002118capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119
2120static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002121string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122{
2123 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002124 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125 PyObject *new;
2126
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127 new = PyString_FromStringAndSize(NULL, n);
2128 if (new == NULL)
2129 return NULL;
2130 s_new = PyString_AsString(new);
2131 if (0 < n) {
2132 int c = Py_CHARMASK(*s++);
2133 if (islower(c))
2134 *s_new = toupper(c);
2135 else
2136 *s_new = c;
2137 s_new++;
2138 }
2139 for (i = 1; i < n; i++) {
2140 int c = Py_CHARMASK(*s++);
2141 if (isupper(c))
2142 *s_new = tolower(c);
2143 else
2144 *s_new = c;
2145 s_new++;
2146 }
2147 return new;
2148}
2149
2150
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002151PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152"S.count(sub[, start[, end]]) -> int\n\
2153\n\
2154Return the number of occurrences of substring sub in string\n\
2155S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002156interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157
2158static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002159string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002161 const char *s = PyString_AS_STRING(self), *sub, *t;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002162 Py_ssize_t len = PyString_GET_SIZE(self), n;
2163 Py_ssize_t i = 0, last = INT_MAX;
2164 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002165 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166
Guido van Rossumc6821402000-05-08 14:08:05 +00002167 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2168 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002170
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 if (PyString_Check(subobj)) {
2172 sub = PyString_AS_STRING(subobj);
2173 n = PyString_GET_SIZE(subobj);
2174 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002175#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002176 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002177 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002178 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2179 if (count == -1)
2180 return NULL;
2181 else
2182 return PyInt_FromLong((long) count);
2183 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002184#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002185 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2186 return NULL;
2187
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002188 string_adjust_indices(&i, &last, len);
2189
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002190 m = last + 1 - n;
2191 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002192 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002193
2194 r = 0;
2195 while (i < m) {
2196 if (!memcmp(s+i, sub, n)) {
2197 r++;
2198 i += n;
2199 } else {
2200 i++;
2201 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002202 if (i >= m)
2203 break;
2204 t = memchr(s+i, sub[0], m-i);
2205 if (t == NULL)
2206 break;
2207 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002208 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002209 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002210}
2211
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002212PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213"S.swapcase() -> string\n\
2214\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002215Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002216converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217
2218static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002219string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002220{
2221 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002222 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002223 PyObject *new;
2224
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225 new = PyString_FromStringAndSize(NULL, n);
2226 if (new == NULL)
2227 return NULL;
2228 s_new = PyString_AsString(new);
2229 for (i = 0; i < n; i++) {
2230 int c = Py_CHARMASK(*s++);
2231 if (islower(c)) {
2232 *s_new = toupper(c);
2233 }
2234 else if (isupper(c)) {
2235 *s_new = tolower(c);
2236 }
2237 else
2238 *s_new = c;
2239 s_new++;
2240 }
2241 return new;
2242}
2243
2244
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002245PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246"S.translate(table [,deletechars]) -> string\n\
2247\n\
2248Return a copy of the string S, where all characters occurring\n\
2249in the optional argument deletechars are removed, and the\n\
2250remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002251translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002252
2253static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002254string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256 register char *input, *output;
2257 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002258 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002260 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002261 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262 PyObject *result;
2263 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002264 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002266 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002267 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002268 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269
2270 if (PyString_Check(tableobj)) {
2271 table1 = PyString_AS_STRING(tableobj);
2272 tablen = PyString_GET_SIZE(tableobj);
2273 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002274#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002275 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002276 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002277 parameter; instead a mapping to None will cause characters
2278 to be deleted. */
2279 if (delobj != NULL) {
2280 PyErr_SetString(PyExc_TypeError,
2281 "deletions are implemented differently for unicode");
2282 return NULL;
2283 }
2284 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2285 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002286#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002288 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002289
Martin v. Löwis00b61272002-12-12 20:03:19 +00002290 if (tablen != 256) {
2291 PyErr_SetString(PyExc_ValueError,
2292 "translation table must be 256 characters long");
2293 return NULL;
2294 }
2295
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296 if (delobj != NULL) {
2297 if (PyString_Check(delobj)) {
2298 del_table = PyString_AS_STRING(delobj);
2299 dellen = PyString_GET_SIZE(delobj);
2300 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002301#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002302 else if (PyUnicode_Check(delobj)) {
2303 PyErr_SetString(PyExc_TypeError,
2304 "deletions are implemented differently for unicode");
2305 return NULL;
2306 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002307#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2309 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002310 }
2311 else {
2312 del_table = NULL;
2313 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314 }
2315
2316 table = table1;
2317 inlen = PyString_Size(input_obj);
2318 result = PyString_FromStringAndSize((char *)NULL, inlen);
2319 if (result == NULL)
2320 return NULL;
2321 output_start = output = PyString_AsString(result);
2322 input = PyString_AsString(input_obj);
2323
2324 if (dellen == 0) {
2325 /* If no deletions are required, use faster code */
2326 for (i = inlen; --i >= 0; ) {
2327 c = Py_CHARMASK(*input++);
2328 if (Py_CHARMASK((*output++ = table[c])) != c)
2329 changed = 1;
2330 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002331 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332 return result;
2333 Py_DECREF(result);
2334 Py_INCREF(input_obj);
2335 return input_obj;
2336 }
2337
2338 for (i = 0; i < 256; i++)
2339 trans_table[i] = Py_CHARMASK(table[i]);
2340
2341 for (i = 0; i < dellen; i++)
2342 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2343
2344 for (i = inlen; --i >= 0; ) {
2345 c = Py_CHARMASK(*input++);
2346 if (trans_table[c] != -1)
2347 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2348 continue;
2349 changed = 1;
2350 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002351 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352 Py_DECREF(result);
2353 Py_INCREF(input_obj);
2354 return input_obj;
2355 }
2356 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002357 if (inlen > 0)
2358 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002359 return result;
2360}
2361
2362
2363/* What follows is used for implementing replace(). Perry Stoll. */
2364
2365/*
2366 mymemfind
2367
2368 strstr replacement for arbitrary blocks of memory.
2369
Barry Warsaw51ac5802000-03-20 16:36:48 +00002370 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002371 contents of memory pointed to by PAT. Returns the index into MEM if
2372 found, or -1 if not found. If len of PAT is greater than length of
2373 MEM, the function returns -1.
2374*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00002375static Py_ssize_t
2376mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002377{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002378 register Py_ssize_t ii;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379
2380 /* pattern can not occur in the last pat_len-1 chars */
2381 len -= pat_len;
2382
2383 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002384 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385 return ii;
2386 }
2387 }
2388 return -1;
2389}
2390
2391/*
2392 mymemcnt
2393
2394 Return the number of distinct times PAT is found in MEM.
2395 meaning mem=1111 and pat==11 returns 2.
2396 mem=11111 and pat==11 also return 2.
2397 */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002398static Py_ssize_t
2399mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002400{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002401 register Py_ssize_t offset = 0;
2402 Py_ssize_t nfound = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002403
2404 while (len >= 0) {
2405 offset = mymemfind(mem, len, pat, pat_len);
2406 if (offset == -1)
2407 break;
2408 mem += offset + pat_len;
2409 len -= offset + pat_len;
2410 nfound++;
2411 }
2412 return nfound;
2413}
2414
2415/*
2416 mymemreplace
2417
Thomas Wouters7e474022000-07-16 12:04:32 +00002418 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419 replaced with SUB.
2420
Thomas Wouters7e474022000-07-16 12:04:32 +00002421 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422 of PAT in STR, then the original string is returned. Otherwise, a new
2423 string is allocated here and returned.
2424
2425 on return, out_len is:
2426 the length of output string, or
2427 -1 if the input string is returned, or
2428 unchanged if an error occurs (no memory).
2429
2430 return value is:
2431 the new string allocated locally, or
2432 NULL if an error occurred.
2433*/
2434static char *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002435mymemreplace(const char *str, Py_ssize_t len, /* input string */
2436 const char *pat, Py_ssize_t pat_len, /* pattern string to find */
2437 const char *sub, Py_ssize_t sub_len, /* substitution string */
2438 Py_ssize_t count, /* number of replacements */
2439 Py_ssize_t *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002440{
2441 char *out_s;
2442 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002443 Py_ssize_t nfound, offset, new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002444
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002445 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 goto return_same;
2447
2448 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002449 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002450 if (count < 0)
2451 count = INT_MAX;
2452 else if (nfound > count)
2453 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002454 if (nfound == 0)
2455 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002456
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002457 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002458 if (new_len == 0) {
2459 /* Have to allocate something for the caller to free(). */
2460 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002461 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002462 return NULL;
2463 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002464 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002465 else {
2466 assert(new_len > 0);
2467 new_s = (char *)PyMem_MALLOC(new_len);
2468 if (new_s == NULL)
2469 return NULL;
2470 out_s = new_s;
2471
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002472 if (pat_len > 0) {
2473 for (; nfound > 0; --nfound) {
2474 /* find index of next instance of pattern */
2475 offset = mymemfind(str, len, pat, pat_len);
2476 if (offset == -1)
2477 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002478
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002479 /* copy non matching part of input string */
2480 memcpy(new_s, str, offset);
2481 str += offset + pat_len;
2482 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002483
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002484 /* copy substitute into the output string */
2485 new_s += offset;
2486 memcpy(new_s, sub, sub_len);
2487 new_s += sub_len;
2488 }
2489 /* copy any remaining values into output string */
2490 if (len > 0)
2491 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002492 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002493 else {
2494 for (;;++str, --len) {
2495 memcpy(new_s, sub, sub_len);
2496 new_s += sub_len;
2497 if (--nfound <= 0) {
2498 memcpy(new_s, str, len);
2499 break;
2500 }
2501 *new_s++ = *str;
2502 }
2503 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002504 }
2505 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506 return out_s;
2507
2508 return_same:
2509 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002510 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002511}
2512
2513
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002514PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002515"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002516\n\
2517Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002518old replaced by new. If the optional argument count is\n\
2519given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002520
2521static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002522string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002523{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002524 const char *str = PyString_AS_STRING(self), *sub, *repl;
2525 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002526 const Py_ssize_t len = PyString_GET_SIZE(self);
2527 Py_ssize_t sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002528 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002529 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002531
Guido van Rossum4c08d552000-03-10 22:55:18 +00002532 if (!PyArg_ParseTuple(args, "OO|i:replace",
2533 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002534 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002535
2536 if (PyString_Check(subobj)) {
2537 sub = PyString_AS_STRING(subobj);
2538 sub_len = PyString_GET_SIZE(subobj);
2539 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002540#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002541 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002542 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002543 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002544#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002545 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2546 return NULL;
2547
2548 if (PyString_Check(replobj)) {
2549 repl = PyString_AS_STRING(replobj);
2550 repl_len = PyString_GET_SIZE(replobj);
2551 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002552#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002553 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002554 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002555 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002556#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002557 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2558 return NULL;
2559
Guido van Rossum4c08d552000-03-10 22:55:18 +00002560 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002561 if (new_s == NULL) {
2562 PyErr_NoMemory();
2563 return NULL;
2564 }
2565 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002566 if (PyString_CheckExact(self)) {
2567 /* we're returning another reference to self */
2568 new = (PyObject*)self;
2569 Py_INCREF(new);
2570 }
2571 else {
2572 new = PyString_FromStringAndSize(str, len);
2573 if (new == NULL)
2574 return NULL;
2575 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002576 }
2577 else {
2578 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002579 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002580 }
2581 return new;
2582}
2583
2584
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002585PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002586"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002587\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002588Return True if S starts with the specified prefix, False otherwise.\n\
2589With optional start, test S beginning at that position.\n\
2590With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002591
2592static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002593string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002594{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002595 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002596 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002597 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002598 Py_ssize_t plen;
2599 Py_ssize_t start = 0;
2600 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002601 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002602
Guido van Rossumc6821402000-05-08 14:08:05 +00002603 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2604 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002605 return NULL;
2606 if (PyString_Check(subobj)) {
2607 prefix = PyString_AS_STRING(subobj);
2608 plen = PyString_GET_SIZE(subobj);
2609 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002610#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002611 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002612 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002613 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002614 subobj, start, end, -1);
2615 if (rc == -1)
2616 return NULL;
2617 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002618 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002619 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002620#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002621 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002622 return NULL;
2623
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002624 string_adjust_indices(&start, &end, len);
2625
2626 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002627 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002628
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002629 if (end-start >= plen)
2630 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2631 else
2632 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002633}
2634
2635
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002636PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002637"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002638\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002639Return True if S ends with the specified suffix, False otherwise.\n\
2640With optional start, test S beginning at that position.\n\
2641With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002642
2643static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002644string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002645{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002647 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002648 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002649 Py_ssize_t slen;
2650 Py_ssize_t start = 0;
2651 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002652 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002653
Guido van Rossumc6821402000-05-08 14:08:05 +00002654 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2655 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002656 return NULL;
2657 if (PyString_Check(subobj)) {
2658 suffix = PyString_AS_STRING(subobj);
2659 slen = PyString_GET_SIZE(subobj);
2660 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002661#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002662 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002663 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002664 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002665 subobj, start, end, +1);
2666 if (rc == -1)
2667 return NULL;
2668 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002669 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002670 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002671#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002672 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002673 return NULL;
2674
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002675 string_adjust_indices(&start, &end, len);
2676
2677 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002678 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002679
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002680 if (end-slen > start)
2681 start = end - slen;
2682 if (end-start >= slen)
2683 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2684 else
2685 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002686}
2687
2688
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002689PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002690"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002691\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002692Encodes S using the codec registered for encoding. encoding defaults\n\
2693to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002694handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002695a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2696'xmlcharrefreplace' as well as any other name registered with\n\
2697codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002698
2699static PyObject *
2700string_encode(PyStringObject *self, PyObject *args)
2701{
2702 char *encoding = NULL;
2703 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002704 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00002705
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002706 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2707 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002708 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002709 if (v == NULL)
2710 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002711 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2712 PyErr_Format(PyExc_TypeError,
2713 "encoder did not return a string/unicode object "
2714 "(type=%.400s)",
2715 v->ob_type->tp_name);
2716 Py_DECREF(v);
2717 return NULL;
2718 }
2719 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002720
2721 onError:
2722 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002723}
2724
2725
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002726PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002727"S.decode([encoding[,errors]]) -> object\n\
2728\n\
2729Decodes S using the codec registered for encoding. encoding defaults\n\
2730to the default encoding. errors may be given to set a different error\n\
2731handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002732a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2733as well as any other name registerd with codecs.register_error that is\n\
2734able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002735
2736static PyObject *
2737string_decode(PyStringObject *self, PyObject *args)
2738{
2739 char *encoding = NULL;
2740 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002741 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00002742
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002743 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2744 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002745 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002746 if (v == NULL)
2747 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002748 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2749 PyErr_Format(PyExc_TypeError,
2750 "decoder did not return a string/unicode object "
2751 "(type=%.400s)",
2752 v->ob_type->tp_name);
2753 Py_DECREF(v);
2754 return NULL;
2755 }
2756 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002757
2758 onError:
2759 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002760}
2761
2762
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002763PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002764"S.expandtabs([tabsize]) -> string\n\
2765\n\
2766Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002767If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002768
2769static PyObject*
2770string_expandtabs(PyStringObject *self, PyObject *args)
2771{
2772 const char *e, *p;
2773 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002774 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002775 PyObject *u;
2776 int tabsize = 8;
2777
2778 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2779 return NULL;
2780
Thomas Wouters7e474022000-07-16 12:04:32 +00002781 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002782 i = j = 0;
2783 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2784 for (p = PyString_AS_STRING(self); p < e; p++)
2785 if (*p == '\t') {
2786 if (tabsize > 0)
2787 j += tabsize - (j % tabsize);
2788 }
2789 else {
2790 j++;
2791 if (*p == '\n' || *p == '\r') {
2792 i += j;
2793 j = 0;
2794 }
2795 }
2796
2797 /* Second pass: create output string and fill it */
2798 u = PyString_FromStringAndSize(NULL, i + j);
2799 if (!u)
2800 return NULL;
2801
2802 j = 0;
2803 q = PyString_AS_STRING(u);
2804
2805 for (p = PyString_AS_STRING(self); p < e; p++)
2806 if (*p == '\t') {
2807 if (tabsize > 0) {
2808 i = tabsize - (j % tabsize);
2809 j += i;
2810 while (i--)
2811 *q++ = ' ';
2812 }
2813 }
2814 else {
2815 j++;
2816 *q++ = *p;
2817 if (*p == '\n' || *p == '\r')
2818 j = 0;
2819 }
2820
2821 return u;
2822}
2823
Tim Peters8fa5dd02001-09-12 02:18:30 +00002824static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002825pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002826{
2827 PyObject *u;
2828
2829 if (left < 0)
2830 left = 0;
2831 if (right < 0)
2832 right = 0;
2833
Tim Peters8fa5dd02001-09-12 02:18:30 +00002834 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002835 Py_INCREF(self);
2836 return (PyObject *)self;
2837 }
2838
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002839 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002840 left + PyString_GET_SIZE(self) + right);
2841 if (u) {
2842 if (left)
2843 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002844 memcpy(PyString_AS_STRING(u) + left,
2845 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002846 PyString_GET_SIZE(self));
2847 if (right)
2848 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2849 fill, right);
2850 }
2851
2852 return u;
2853}
2854
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002855PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002856"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002857"\n"
2858"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002859"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002860
2861static PyObject *
2862string_ljust(PyStringObject *self, PyObject *args)
2863{
2864 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002865 char fillchar = ' ';
2866
2867 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002868 return NULL;
2869
Tim Peters8fa5dd02001-09-12 02:18:30 +00002870 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002871 Py_INCREF(self);
2872 return (PyObject*) self;
2873 }
2874
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002875 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002876}
2877
2878
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002879PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002880"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002881"\n"
2882"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002883"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002884
2885static PyObject *
2886string_rjust(PyStringObject *self, PyObject *args)
2887{
2888 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002889 char fillchar = ' ';
2890
2891 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002892 return NULL;
2893
Tim Peters8fa5dd02001-09-12 02:18:30 +00002894 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002895 Py_INCREF(self);
2896 return (PyObject*) self;
2897 }
2898
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002899 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002900}
2901
2902
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002903PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002904"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002905"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002906"Return S centered in a string of length width. Padding is\n"
2907"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002908
2909static PyObject *
2910string_center(PyStringObject *self, PyObject *args)
2911{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002912 Py_ssize_t marg, left;
2913 long width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002914 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002915
Martin v. Löwis18e16552006-02-15 17:27:45 +00002916 if (!PyArg_ParseTuple(args, "l|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002917 return NULL;
2918
Tim Peters8fa5dd02001-09-12 02:18:30 +00002919 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002920 Py_INCREF(self);
2921 return (PyObject*) self;
2922 }
2923
2924 marg = width - PyString_GET_SIZE(self);
2925 left = marg / 2 + (marg & width & 1);
2926
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002927 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002928}
2929
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002930PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002931"S.zfill(width) -> string\n"
2932"\n"
2933"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002934"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002935
2936static PyObject *
2937string_zfill(PyStringObject *self, PyObject *args)
2938{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002939 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00002940 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002941 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002942
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002943 long width;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002944 if (!PyArg_ParseTuple(args, "l:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00002945 return NULL;
2946
2947 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002948 if (PyString_CheckExact(self)) {
2949 Py_INCREF(self);
2950 return (PyObject*) self;
2951 }
2952 else
2953 return PyString_FromStringAndSize(
2954 PyString_AS_STRING(self),
2955 PyString_GET_SIZE(self)
2956 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002957 }
2958
2959 fill = width - PyString_GET_SIZE(self);
2960
2961 s = pad(self, fill, 0, '0');
2962
2963 if (s == NULL)
2964 return NULL;
2965
2966 p = PyString_AS_STRING(s);
2967 if (p[fill] == '+' || p[fill] == '-') {
2968 /* move sign to beginning of string */
2969 p[0] = p[fill];
2970 p[fill] = '0';
2971 }
2972
2973 return (PyObject*) s;
2974}
2975
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002976PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002977"S.isspace() -> bool\n\
2978\n\
2979Return True if all characters in S are whitespace\n\
2980and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002981
2982static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002983string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002984{
Fred Drakeba096332000-07-09 07:04:36 +00002985 register const unsigned char *p
2986 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002987 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002988
Guido van Rossum4c08d552000-03-10 22:55:18 +00002989 /* Shortcut for single character strings */
2990 if (PyString_GET_SIZE(self) == 1 &&
2991 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002992 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002993
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002994 /* Special case for empty strings */
2995 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002996 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002997
Guido van Rossum4c08d552000-03-10 22:55:18 +00002998 e = p + PyString_GET_SIZE(self);
2999 for (; p < e; p++) {
3000 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003001 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003002 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003003 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003004}
3005
3006
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003007PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003008"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003009\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003010Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003011and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003012
3013static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003014string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003015{
Fred Drakeba096332000-07-09 07:04:36 +00003016 register const unsigned char *p
3017 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003018 register const unsigned char *e;
3019
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003020 /* Shortcut for single character strings */
3021 if (PyString_GET_SIZE(self) == 1 &&
3022 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003023 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003024
3025 /* Special case for empty strings */
3026 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003027 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003028
3029 e = p + PyString_GET_SIZE(self);
3030 for (; p < e; p++) {
3031 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003032 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003033 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003034 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003035}
3036
3037
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003038PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003039"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003040\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003041Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003042and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003043
3044static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003045string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003046{
Fred Drakeba096332000-07-09 07:04:36 +00003047 register const unsigned char *p
3048 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003049 register const unsigned char *e;
3050
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003051 /* Shortcut for single character strings */
3052 if (PyString_GET_SIZE(self) == 1 &&
3053 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003054 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003055
3056 /* Special case for empty strings */
3057 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003058 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003059
3060 e = p + PyString_GET_SIZE(self);
3061 for (; p < e; p++) {
3062 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003063 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003064 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003065 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003066}
3067
3068
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003069PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003070"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003072Return True if all characters in S are digits\n\
3073and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003074
3075static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003076string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003077{
Fred Drakeba096332000-07-09 07:04:36 +00003078 register const unsigned char *p
3079 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003080 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003081
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082 /* Shortcut for single character strings */
3083 if (PyString_GET_SIZE(self) == 1 &&
3084 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003085 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003087 /* Special case for empty strings */
3088 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003089 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003090
Guido van Rossum4c08d552000-03-10 22:55:18 +00003091 e = p + PyString_GET_SIZE(self);
3092 for (; p < e; p++) {
3093 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003094 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003095 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003096 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003097}
3098
3099
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003100PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003101"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003102\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003103Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003104at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003105
3106static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003107string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003108{
Fred Drakeba096332000-07-09 07:04:36 +00003109 register const unsigned char *p
3110 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003111 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003112 int cased;
3113
Guido van Rossum4c08d552000-03-10 22:55:18 +00003114 /* Shortcut for single character strings */
3115 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003116 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003117
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003118 /* Special case for empty strings */
3119 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003120 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003121
Guido van Rossum4c08d552000-03-10 22:55:18 +00003122 e = p + PyString_GET_SIZE(self);
3123 cased = 0;
3124 for (; p < e; p++) {
3125 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003126 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003127 else if (!cased && islower(*p))
3128 cased = 1;
3129 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003130 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003131}
3132
3133
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003134PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003135"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003136\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003137Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003138at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003139
3140static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003141string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003142{
Fred Drakeba096332000-07-09 07:04:36 +00003143 register const unsigned char *p
3144 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003145 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003146 int cased;
3147
Guido van Rossum4c08d552000-03-10 22:55:18 +00003148 /* Shortcut for single character strings */
3149 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003150 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003151
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003152 /* Special case for empty strings */
3153 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003154 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003155
Guido van Rossum4c08d552000-03-10 22:55:18 +00003156 e = p + PyString_GET_SIZE(self);
3157 cased = 0;
3158 for (; p < e; p++) {
3159 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003160 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 else if (!cased && isupper(*p))
3162 cased = 1;
3163 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003164 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003165}
3166
3167
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003168PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003169"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003170\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003171Return True if S is a titlecased string and there is at least one\n\
3172character in S, i.e. uppercase characters may only follow uncased\n\
3173characters and lowercase characters only cased ones. Return False\n\
3174otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003175
3176static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003177string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003178{
Fred Drakeba096332000-07-09 07:04:36 +00003179 register const unsigned char *p
3180 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003181 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003182 int cased, previous_is_cased;
3183
Guido van Rossum4c08d552000-03-10 22:55:18 +00003184 /* Shortcut for single character strings */
3185 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003186 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003187
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003188 /* Special case for empty strings */
3189 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003190 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003191
Guido van Rossum4c08d552000-03-10 22:55:18 +00003192 e = p + PyString_GET_SIZE(self);
3193 cased = 0;
3194 previous_is_cased = 0;
3195 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003196 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003197
3198 if (isupper(ch)) {
3199 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003200 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003201 previous_is_cased = 1;
3202 cased = 1;
3203 }
3204 else if (islower(ch)) {
3205 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003206 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003207 previous_is_cased = 1;
3208 cased = 1;
3209 }
3210 else
3211 previous_is_cased = 0;
3212 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003213 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003214}
3215
3216
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003217PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003218"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003219\n\
3220Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003221Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003222is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003223
Guido van Rossum4c08d552000-03-10 22:55:18 +00003224static PyObject*
3225string_splitlines(PyStringObject *self, PyObject *args)
3226{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003227 register Py_ssize_t i;
3228 register Py_ssize_t j;
3229 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003230 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003231 PyObject *list;
3232 PyObject *str;
3233 char *data;
3234
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003235 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003236 return NULL;
3237
3238 data = PyString_AS_STRING(self);
3239 len = PyString_GET_SIZE(self);
3240
Guido van Rossum4c08d552000-03-10 22:55:18 +00003241 list = PyList_New(0);
3242 if (!list)
3243 goto onError;
3244
3245 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003246 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003247
Guido van Rossum4c08d552000-03-10 22:55:18 +00003248 /* Find a line and append it */
3249 while (i < len && data[i] != '\n' && data[i] != '\r')
3250 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003251
3252 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003253 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003254 if (i < len) {
3255 if (data[i] == '\r' && i + 1 < len &&
3256 data[i+1] == '\n')
3257 i += 2;
3258 else
3259 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003260 if (keepends)
3261 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003262 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003263 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003264 j = i;
3265 }
3266 if (j < len) {
3267 SPLIT_APPEND(data, j, len);
3268 }
3269
3270 return list;
3271
3272 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003273 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003274 return NULL;
3275}
3276
3277#undef SPLIT_APPEND
3278
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003279static PyObject *
3280string_getnewargs(PyStringObject *v)
3281{
3282 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3283}
3284
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003285
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003286static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003287string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003288 /* Counterparts of the obsolete stropmodule functions; except
3289 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003290 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3291 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003292 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003293 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3294 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003295 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3296 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3297 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3298 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3299 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3300 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3301 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003302 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3303 capitalize__doc__},
3304 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3305 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3306 endswith__doc__},
3307 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3308 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3309 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3310 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3311 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3312 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3313 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3314 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3315 startswith__doc__},
3316 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3317 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3318 swapcase__doc__},
3319 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3320 translate__doc__},
3321 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3322 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3323 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3324 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3325 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3326 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3327 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3328 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3329 expandtabs__doc__},
3330 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3331 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003332 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003333 {NULL, NULL} /* sentinel */
3334};
3335
Jeremy Hylton938ace62002-07-17 16:30:39 +00003336static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003337str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3338
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003339static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003340string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003341{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003342 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003343 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003344
Guido van Rossumae960af2001-08-30 03:11:59 +00003345 if (type != &PyString_Type)
3346 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003347 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3348 return NULL;
3349 if (x == NULL)
3350 return PyString_FromString("");
3351 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003352}
3353
Guido van Rossumae960af2001-08-30 03:11:59 +00003354static PyObject *
3355str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3356{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003357 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003358 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003359
3360 assert(PyType_IsSubtype(type, &PyString_Type));
3361 tmp = string_new(&PyString_Type, args, kwds);
3362 if (tmp == NULL)
3363 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003364 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003365 n = PyString_GET_SIZE(tmp);
3366 pnew = type->tp_alloc(type, n);
3367 if (pnew != NULL) {
3368 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003369 ((PyStringObject *)pnew)->ob_shash =
3370 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003371 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003372 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003373 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003374 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003375}
3376
Guido van Rossumcacfc072002-05-24 19:01:59 +00003377static PyObject *
3378basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3379{
3380 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003381 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003382 return NULL;
3383}
3384
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003385static PyObject *
3386string_mod(PyObject *v, PyObject *w)
3387{
3388 if (!PyString_Check(v)) {
3389 Py_INCREF(Py_NotImplemented);
3390 return Py_NotImplemented;
3391 }
3392 return PyString_Format(v, w);
3393}
3394
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003395PyDoc_STRVAR(basestring_doc,
3396"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003397
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003398static PyNumberMethods string_as_number = {
3399 0, /*nb_add*/
3400 0, /*nb_subtract*/
3401 0, /*nb_multiply*/
3402 0, /*nb_divide*/
3403 string_mod, /*nb_remainder*/
3404};
3405
3406
Guido van Rossumcacfc072002-05-24 19:01:59 +00003407PyTypeObject PyBaseString_Type = {
3408 PyObject_HEAD_INIT(&PyType_Type)
3409 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003410 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003411 0,
3412 0,
3413 0, /* tp_dealloc */
3414 0, /* tp_print */
3415 0, /* tp_getattr */
3416 0, /* tp_setattr */
3417 0, /* tp_compare */
3418 0, /* tp_repr */
3419 0, /* tp_as_number */
3420 0, /* tp_as_sequence */
3421 0, /* tp_as_mapping */
3422 0, /* tp_hash */
3423 0, /* tp_call */
3424 0, /* tp_str */
3425 0, /* tp_getattro */
3426 0, /* tp_setattro */
3427 0, /* tp_as_buffer */
3428 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3429 basestring_doc, /* tp_doc */
3430 0, /* tp_traverse */
3431 0, /* tp_clear */
3432 0, /* tp_richcompare */
3433 0, /* tp_weaklistoffset */
3434 0, /* tp_iter */
3435 0, /* tp_iternext */
3436 0, /* tp_methods */
3437 0, /* tp_members */
3438 0, /* tp_getset */
3439 &PyBaseObject_Type, /* tp_base */
3440 0, /* tp_dict */
3441 0, /* tp_descr_get */
3442 0, /* tp_descr_set */
3443 0, /* tp_dictoffset */
3444 0, /* tp_init */
3445 0, /* tp_alloc */
3446 basestring_new, /* tp_new */
3447 0, /* tp_free */
3448};
3449
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003450PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003451"str(object) -> string\n\
3452\n\
3453Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003454If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003455
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003456PyTypeObject PyString_Type = {
3457 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003458 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003459 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003460 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003461 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003462 (destructor)string_dealloc, /* tp_dealloc */
3463 (printfunc)string_print, /* tp_print */
3464 0, /* tp_getattr */
3465 0, /* tp_setattr */
3466 0, /* tp_compare */
3467 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003468 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003469 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003470 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003471 (hashfunc)string_hash, /* tp_hash */
3472 0, /* tp_call */
3473 (reprfunc)string_str, /* tp_str */
3474 PyObject_GenericGetAttr, /* tp_getattro */
3475 0, /* tp_setattro */
3476 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00003477 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003478 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003479 string_doc, /* tp_doc */
3480 0, /* tp_traverse */
3481 0, /* tp_clear */
3482 (richcmpfunc)string_richcompare, /* tp_richcompare */
3483 0, /* tp_weaklistoffset */
3484 0, /* tp_iter */
3485 0, /* tp_iternext */
3486 string_methods, /* tp_methods */
3487 0, /* tp_members */
3488 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003489 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003490 0, /* tp_dict */
3491 0, /* tp_descr_get */
3492 0, /* tp_descr_set */
3493 0, /* tp_dictoffset */
3494 0, /* tp_init */
3495 0, /* tp_alloc */
3496 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003497 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003498};
3499
3500void
Fred Drakeba096332000-07-09 07:04:36 +00003501PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003502{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003503 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003504 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003505 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003506 if (w == NULL || !PyString_Check(*pv)) {
3507 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003508 *pv = NULL;
3509 return;
3510 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003511 v = string_concat((PyStringObject *) *pv, w);
3512 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003513 *pv = v;
3514}
3515
Guido van Rossum013142a1994-08-30 08:19:36 +00003516void
Fred Drakeba096332000-07-09 07:04:36 +00003517PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003518{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003519 PyString_Concat(pv, w);
3520 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003521}
3522
3523
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003524/* The following function breaks the notion that strings are immutable:
3525 it changes the size of a string. We get away with this only if there
3526 is only one module referencing the object. You can also think of it
3527 as creating a new string object and destroying the old one, only
3528 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003529 already be known to some other part of the code...
3530 Note that if there's not enough memory to resize the string, the original
3531 string object at *pv is deallocated, *pv is set to NULL, an "out of
3532 memory" exception is set, and -1 is returned. Else (on success) 0 is
3533 returned, and the value in *pv may or may not be the same as on input.
3534 As always, an extra byte is allocated for a trailing \0 byte (newsize
3535 does *not* include that), and a trailing \0 byte is stored.
3536*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003537
3538int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003539_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003540{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003541 register PyObject *v;
3542 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003543 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003544 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3545 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003546 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003547 Py_DECREF(v);
3548 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003549 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003550 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003551 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003552 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003553 _Py_ForgetReference(v);
3554 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003555 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003556 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003557 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003558 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003559 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003560 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003561 _Py_NewReference(*pv);
3562 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003563 sv->ob_size = newsize;
3564 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003565 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003566 return 0;
3567}
Guido van Rossume5372401993-03-16 12:15:04 +00003568
3569/* Helpers for formatstring */
3570
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003571static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00003572getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003573{
Thomas Wouters977485d2006-02-16 15:59:12 +00003574 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003575 if (argidx < arglen) {
3576 (*p_argidx)++;
3577 if (arglen < 0)
3578 return args;
3579 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003580 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003581 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003582 PyErr_SetString(PyExc_TypeError,
3583 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003584 return NULL;
3585}
3586
Tim Peters38fd5b62000-09-21 05:43:11 +00003587/* Format codes
3588 * F_LJUST '-'
3589 * F_SIGN '+'
3590 * F_BLANK ' '
3591 * F_ALT '#'
3592 * F_ZERO '0'
3593 */
Guido van Rossume5372401993-03-16 12:15:04 +00003594#define F_LJUST (1<<0)
3595#define F_SIGN (1<<1)
3596#define F_BLANK (1<<2)
3597#define F_ALT (1<<3)
3598#define F_ZERO (1<<4)
3599
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003600static int
Fred Drakeba096332000-07-09 07:04:36 +00003601formatfloat(char *buf, size_t buflen, int flags,
3602 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003603{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003604 /* fmt = '%#.' + `prec` + `type`
3605 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003606 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003607 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003608 x = PyFloat_AsDouble(v);
3609 if (x == -1.0 && PyErr_Occurred()) {
3610 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003611 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003612 }
Guido van Rossume5372401993-03-16 12:15:04 +00003613 if (prec < 0)
3614 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003615 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3616 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003617 /* Worst case length calc to ensure no buffer overrun:
3618
3619 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003620 fmt = %#.<prec>g
3621 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003622 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003623 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003624
3625 'f' formats:
3626 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3627 len = 1 + 50 + 1 + prec = 52 + prec
3628
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003629 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00003630 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003631
3632 */
3633 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3634 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003635 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003636 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003637 return -1;
3638 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003639 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3640 (flags&F_ALT) ? "#" : "",
3641 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003642 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003643 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003644}
3645
Tim Peters38fd5b62000-09-21 05:43:11 +00003646/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3647 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3648 * Python's regular ints.
3649 * Return value: a new PyString*, or NULL if error.
3650 * . *pbuf is set to point into it,
3651 * *plen set to the # of chars following that.
3652 * Caller must decref it when done using pbuf.
3653 * The string starting at *pbuf is of the form
3654 * "-"? ("0x" | "0X")? digit+
3655 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003656 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003657 * There will be at least prec digits, zero-filled on the left if
3658 * necessary to get that many.
3659 * val object to be converted
3660 * flags bitmask of format flags; only F_ALT is looked at
3661 * prec minimum number of digits; 0-fill on left if needed
3662 * type a character in [duoxX]; u acts the same as d
3663 *
3664 * CAUTION: o, x and X conversions on regular ints can never
3665 * produce a '-' sign, but can for Python's unbounded ints.
3666 */
3667PyObject*
3668_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3669 char **pbuf, int *plen)
3670{
3671 PyObject *result = NULL;
3672 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003673 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003674 int sign; /* 1 if '-', else 0 */
3675 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003676 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003677 int numdigits; /* len == numnondigits + numdigits */
3678 int numnondigits = 0;
3679
3680 switch (type) {
3681 case 'd':
3682 case 'u':
3683 result = val->ob_type->tp_str(val);
3684 break;
3685 case 'o':
3686 result = val->ob_type->tp_as_number->nb_oct(val);
3687 break;
3688 case 'x':
3689 case 'X':
3690 numnondigits = 2;
3691 result = val->ob_type->tp_as_number->nb_hex(val);
3692 break;
3693 default:
3694 assert(!"'type' not in [duoxX]");
3695 }
3696 if (!result)
3697 return NULL;
3698
3699 /* To modify the string in-place, there can only be one reference. */
3700 if (result->ob_refcnt != 1) {
3701 PyErr_BadInternalCall();
3702 return NULL;
3703 }
3704 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00003705 llen = PyString_Size(result);
3706 if (llen > INT_MAX) {
3707 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
3708 return NULL;
3709 }
3710 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003711 if (buf[len-1] == 'L') {
3712 --len;
3713 buf[len] = '\0';
3714 }
3715 sign = buf[0] == '-';
3716 numnondigits += sign;
3717 numdigits = len - numnondigits;
3718 assert(numdigits > 0);
3719
Tim Petersfff53252001-04-12 18:38:48 +00003720 /* Get rid of base marker unless F_ALT */
3721 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003722 /* Need to skip 0x, 0X or 0. */
3723 int skipped = 0;
3724 switch (type) {
3725 case 'o':
3726 assert(buf[sign] == '0');
3727 /* If 0 is only digit, leave it alone. */
3728 if (numdigits > 1) {
3729 skipped = 1;
3730 --numdigits;
3731 }
3732 break;
3733 case 'x':
3734 case 'X':
3735 assert(buf[sign] == '0');
3736 assert(buf[sign + 1] == 'x');
3737 skipped = 2;
3738 numnondigits -= 2;
3739 break;
3740 }
3741 if (skipped) {
3742 buf += skipped;
3743 len -= skipped;
3744 if (sign)
3745 buf[0] = '-';
3746 }
3747 assert(len == numnondigits + numdigits);
3748 assert(numdigits > 0);
3749 }
3750
3751 /* Fill with leading zeroes to meet minimum width. */
3752 if (prec > numdigits) {
3753 PyObject *r1 = PyString_FromStringAndSize(NULL,
3754 numnondigits + prec);
3755 char *b1;
3756 if (!r1) {
3757 Py_DECREF(result);
3758 return NULL;
3759 }
3760 b1 = PyString_AS_STRING(r1);
3761 for (i = 0; i < numnondigits; ++i)
3762 *b1++ = *buf++;
3763 for (i = 0; i < prec - numdigits; i++)
3764 *b1++ = '0';
3765 for (i = 0; i < numdigits; i++)
3766 *b1++ = *buf++;
3767 *b1 = '\0';
3768 Py_DECREF(result);
3769 result = r1;
3770 buf = PyString_AS_STRING(result);
3771 len = numnondigits + prec;
3772 }
3773
3774 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003775 if (type == 'X') {
3776 /* Need to convert all lower case letters to upper case.
3777 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003778 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003779 if (buf[i] >= 'a' && buf[i] <= 'x')
3780 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003781 }
3782 *pbuf = buf;
3783 *plen = len;
3784 return result;
3785}
3786
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003787static int
Fred Drakeba096332000-07-09 07:04:36 +00003788formatint(char *buf, size_t buflen, int flags,
3789 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003790{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003791 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003792 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3793 + 1 + 1 = 24 */
3794 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003795 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003796 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003797
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003798 x = PyInt_AsLong(v);
3799 if (x == -1 && PyErr_Occurred()) {
3800 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003801 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003802 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003803 if (x < 0 && type == 'u') {
3804 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003805 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003806 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3807 sign = "-";
3808 else
3809 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003810 if (prec < 0)
3811 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003812
3813 if ((flags & F_ALT) &&
3814 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003815 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003816 * of issues that cause pain:
3817 * - when 0 is being converted, the C standard leaves off
3818 * the '0x' or '0X', which is inconsistent with other
3819 * %#x/%#X conversions and inconsistent with Python's
3820 * hex() function
3821 * - there are platforms that violate the standard and
3822 * convert 0 with the '0x' or '0X'
3823 * (Metrowerks, Compaq Tru64)
3824 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003825 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003826 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003827 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003828 * We can achieve the desired consistency by inserting our
3829 * own '0x' or '0X' prefix, and substituting %x/%X in place
3830 * of %#x/%#X.
3831 *
3832 * Note that this is the same approach as used in
3833 * formatint() in unicodeobject.c
3834 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003835 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3836 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003837 }
3838 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003839 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3840 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003841 prec, type);
3842 }
3843
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003844 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3845 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003846 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003847 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003848 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003849 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003850 return -1;
3851 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003852 if (sign[0])
3853 PyOS_snprintf(buf, buflen, fmt, -x);
3854 else
3855 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003856 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003857}
3858
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003859static int
Fred Drakeba096332000-07-09 07:04:36 +00003860formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003861{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003862 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003863 if (PyString_Check(v)) {
3864 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003865 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003866 }
3867 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003868 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003869 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003870 }
3871 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003872 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003873}
3874
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003875/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3876
3877 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3878 chars are formatted. XXX This is a magic number. Each formatting
3879 routine does bounds checking to ensure no overflow, but a better
3880 solution may be to malloc a buffer of appropriate size for each
3881 format. For now, the current solution is sufficient.
3882*/
3883#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003884
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003885PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003886PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003887{
3888 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003889 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003890 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00003891 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003892 PyObject *result, *orig_args;
3893#ifdef Py_USING_UNICODE
3894 PyObject *v, *w;
3895#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003896 PyObject *dict = NULL;
3897 if (format == NULL || !PyString_Check(format) || args == NULL) {
3898 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003899 return NULL;
3900 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003901 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003902 fmt = PyString_AS_STRING(format);
3903 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003904 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003905 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003906 if (result == NULL)
3907 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003908 res = PyString_AsString(result);
3909 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003910 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003911 argidx = 0;
3912 }
3913 else {
3914 arglen = -1;
3915 argidx = -2;
3916 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003917 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3918 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003919 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003920 while (--fmtcnt >= 0) {
3921 if (*fmt != '%') {
3922 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003923 rescnt = fmtcnt + 100;
3924 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003925 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003926 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003927 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003928 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003929 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003930 }
3931 *res++ = *fmt++;
3932 }
3933 else {
3934 /* Got a format specifier */
3935 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003936 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003937 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003938 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003939 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003940 PyObject *v = NULL;
3941 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003942 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003943 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003944 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003945 char formatbuf[FORMATBUFLEN];
3946 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003947#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003948 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003949 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003950#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003951
Guido van Rossumda9c2711996-12-05 21:58:58 +00003952 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003953 if (*fmt == '(') {
3954 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003955 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003956 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003957 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003958
3959 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003960 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003961 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003962 goto error;
3963 }
3964 ++fmt;
3965 --fmtcnt;
3966 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003967 /* Skip over balanced parentheses */
3968 while (pcount > 0 && --fmtcnt >= 0) {
3969 if (*fmt == ')')
3970 --pcount;
3971 else if (*fmt == '(')
3972 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003973 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003974 }
3975 keylen = fmt - keystart - 1;
3976 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003977 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003978 "incomplete format key");
3979 goto error;
3980 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003981 key = PyString_FromStringAndSize(keystart,
3982 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003983 if (key == NULL)
3984 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003985 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003986 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003987 args_owned = 0;
3988 }
3989 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003990 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003991 if (args == NULL) {
3992 goto error;
3993 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003994 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003995 arglen = -1;
3996 argidx = -2;
3997 }
Guido van Rossume5372401993-03-16 12:15:04 +00003998 while (--fmtcnt >= 0) {
3999 switch (c = *fmt++) {
4000 case '-': flags |= F_LJUST; continue;
4001 case '+': flags |= F_SIGN; continue;
4002 case ' ': flags |= F_BLANK; continue;
4003 case '#': flags |= F_ALT; continue;
4004 case '0': flags |= F_ZERO; continue;
4005 }
4006 break;
4007 }
4008 if (c == '*') {
4009 v = getnextarg(args, arglen, &argidx);
4010 if (v == NULL)
4011 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004012 if (!PyInt_Check(v)) {
4013 PyErr_SetString(PyExc_TypeError,
4014 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004015 goto error;
4016 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004017 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004018 if (width < 0) {
4019 flags |= F_LJUST;
4020 width = -width;
4021 }
Guido van Rossume5372401993-03-16 12:15:04 +00004022 if (--fmtcnt >= 0)
4023 c = *fmt++;
4024 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004025 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004026 width = c - '0';
4027 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004028 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004029 if (!isdigit(c))
4030 break;
4031 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004032 PyErr_SetString(
4033 PyExc_ValueError,
4034 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004035 goto error;
4036 }
4037 width = width*10 + (c - '0');
4038 }
4039 }
4040 if (c == '.') {
4041 prec = 0;
4042 if (--fmtcnt >= 0)
4043 c = *fmt++;
4044 if (c == '*') {
4045 v = getnextarg(args, arglen, &argidx);
4046 if (v == NULL)
4047 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004048 if (!PyInt_Check(v)) {
4049 PyErr_SetString(
4050 PyExc_TypeError,
4051 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004052 goto error;
4053 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004054 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004055 if (prec < 0)
4056 prec = 0;
4057 if (--fmtcnt >= 0)
4058 c = *fmt++;
4059 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004060 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004061 prec = c - '0';
4062 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004063 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004064 if (!isdigit(c))
4065 break;
4066 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004067 PyErr_SetString(
4068 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004069 "prec too big");
4070 goto error;
4071 }
4072 prec = prec*10 + (c - '0');
4073 }
4074 }
4075 } /* prec */
4076 if (fmtcnt >= 0) {
4077 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004078 if (--fmtcnt >= 0)
4079 c = *fmt++;
4080 }
4081 }
4082 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004083 PyErr_SetString(PyExc_ValueError,
4084 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004085 goto error;
4086 }
4087 if (c != '%') {
4088 v = getnextarg(args, arglen, &argidx);
4089 if (v == NULL)
4090 goto error;
4091 }
4092 sign = 0;
4093 fill = ' ';
4094 switch (c) {
4095 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004096 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004097 len = 1;
4098 break;
4099 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004100#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004101 if (PyUnicode_Check(v)) {
4102 fmt = fmt_start;
4103 argidx = argidx_start;
4104 goto unicode;
4105 }
Georg Brandld45014b2005-10-01 17:06:00 +00004106#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004107 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004108#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004109 if (temp != NULL && PyUnicode_Check(temp)) {
4110 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004111 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004112 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004113 goto unicode;
4114 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004115#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004116 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004117 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004118 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004119 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004120 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004121 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004122 if (!PyString_Check(temp)) {
4123 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004124 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004125 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004126 goto error;
4127 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004128 pbuf = PyString_AS_STRING(temp);
4129 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004130 if (prec >= 0 && len > prec)
4131 len = prec;
4132 break;
4133 case 'i':
4134 case 'd':
4135 case 'u':
4136 case 'o':
4137 case 'x':
4138 case 'X':
4139 if (c == 'i')
4140 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004141 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004142 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004143 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004144 prec, c, &pbuf, &ilen);
4145 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004146 if (!temp)
4147 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004148 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004149 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004150 else {
4151 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004152 len = formatint(pbuf,
4153 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004154 flags, prec, c, v);
4155 if (len < 0)
4156 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004157 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004158 }
4159 if (flags & F_ZERO)
4160 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004161 break;
4162 case 'e':
4163 case 'E':
4164 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004165 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004166 case 'g':
4167 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004168 if (c == 'F')
4169 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004170 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004171 len = formatfloat(pbuf, sizeof(formatbuf),
4172 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004173 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004174 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004175 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004176 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004177 fill = '0';
4178 break;
4179 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004180#ifdef Py_USING_UNICODE
4181 if (PyUnicode_Check(v)) {
4182 fmt = fmt_start;
4183 argidx = argidx_start;
4184 goto unicode;
4185 }
4186#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004187 pbuf = formatbuf;
4188 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004189 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004190 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004191 break;
4192 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004193 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004194 "unsupported format character '%c' (0x%x) "
4195 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004196 c, c,
4197 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004198 goto error;
4199 }
4200 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004201 if (*pbuf == '-' || *pbuf == '+') {
4202 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004203 len--;
4204 }
4205 else if (flags & F_SIGN)
4206 sign = '+';
4207 else if (flags & F_BLANK)
4208 sign = ' ';
4209 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004210 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004211 }
4212 if (width < len)
4213 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004214 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004215 reslen -= rescnt;
4216 rescnt = width + fmtcnt + 100;
4217 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004218 if (reslen < 0) {
4219 Py_DECREF(result);
4220 return PyErr_NoMemory();
4221 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004222 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004223 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004224 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004225 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004226 }
4227 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004228 if (fill != ' ')
4229 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004230 rescnt--;
4231 if (width > len)
4232 width--;
4233 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004234 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4235 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004236 assert(pbuf[1] == c);
4237 if (fill != ' ') {
4238 *res++ = *pbuf++;
4239 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004240 }
Tim Petersfff53252001-04-12 18:38:48 +00004241 rescnt -= 2;
4242 width -= 2;
4243 if (width < 0)
4244 width = 0;
4245 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004246 }
4247 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004248 do {
4249 --rescnt;
4250 *res++ = fill;
4251 } while (--width > len);
4252 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004253 if (fill == ' ') {
4254 if (sign)
4255 *res++ = sign;
4256 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004257 (c == 'x' || c == 'X')) {
4258 assert(pbuf[0] == '0');
4259 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004260 *res++ = *pbuf++;
4261 *res++ = *pbuf++;
4262 }
4263 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004264 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004265 res += len;
4266 rescnt -= len;
4267 while (--width >= len) {
4268 --rescnt;
4269 *res++ = ' ';
4270 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004271 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004272 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004273 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004274 goto error;
4275 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004276 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004277 } /* '%' */
4278 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004279 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004280 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004281 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004282 goto error;
4283 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004284 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004285 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004286 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004287 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004288 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004289
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004290#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004291 unicode:
4292 if (args_owned) {
4293 Py_DECREF(args);
4294 args_owned = 0;
4295 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004296 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004297 if (PyTuple_Check(orig_args) && argidx > 0) {
4298 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004299 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004300 v = PyTuple_New(n);
4301 if (v == NULL)
4302 goto error;
4303 while (--n >= 0) {
4304 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4305 Py_INCREF(w);
4306 PyTuple_SET_ITEM(v, n, w);
4307 }
4308 args = v;
4309 } else {
4310 Py_INCREF(orig_args);
4311 args = orig_args;
4312 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004313 args_owned = 1;
4314 /* Take what we have of the result and let the Unicode formatting
4315 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004316 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004317 if (_PyString_Resize(&result, rescnt))
4318 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004319 fmtcnt = PyString_GET_SIZE(format) - \
4320 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004321 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4322 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004323 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004324 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004325 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004326 if (v == NULL)
4327 goto error;
4328 /* Paste what we have (result) to what the Unicode formatting
4329 function returned (v) and return the result (or error) */
4330 w = PyUnicode_Concat(result, v);
4331 Py_DECREF(result);
4332 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004333 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004334 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004335#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004336
Guido van Rossume5372401993-03-16 12:15:04 +00004337 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004338 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004339 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004340 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004341 }
Guido van Rossume5372401993-03-16 12:15:04 +00004342 return NULL;
4343}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004344
Guido van Rossum2a61e741997-01-18 07:55:05 +00004345void
Fred Drakeba096332000-07-09 07:04:36 +00004346PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004347{
4348 register PyStringObject *s = (PyStringObject *)(*p);
4349 PyObject *t;
4350 if (s == NULL || !PyString_Check(s))
4351 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004352 /* If it's a string subclass, we don't really know what putting
4353 it in the interned dict might do. */
4354 if (!PyString_CheckExact(s))
4355 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004356 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004357 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004358 if (interned == NULL) {
4359 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004360 if (interned == NULL) {
4361 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004362 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004363 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004364 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004365 t = PyDict_GetItem(interned, (PyObject *)s);
4366 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004367 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004368 Py_DECREF(*p);
4369 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004370 return;
4371 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004372
Armin Rigo79f7ad22004-08-07 19:27:39 +00004373 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004374 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004375 return;
4376 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004377 /* The two references in interned are not counted by refcnt.
4378 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004379 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004380 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004381}
4382
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004383void
4384PyString_InternImmortal(PyObject **p)
4385{
4386 PyString_InternInPlace(p);
4387 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4388 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4389 Py_INCREF(*p);
4390 }
4391}
4392
Guido van Rossum2a61e741997-01-18 07:55:05 +00004393
4394PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004395PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004396{
4397 PyObject *s = PyString_FromString(cp);
4398 if (s == NULL)
4399 return NULL;
4400 PyString_InternInPlace(&s);
4401 return s;
4402}
4403
Guido van Rossum8cf04761997-08-02 02:57:45 +00004404void
Fred Drakeba096332000-07-09 07:04:36 +00004405PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004406{
4407 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004408 for (i = 0; i < UCHAR_MAX + 1; i++) {
4409 Py_XDECREF(characters[i]);
4410 characters[i] = NULL;
4411 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004412 Py_XDECREF(nullstring);
4413 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004414}
Barry Warsawa903ad982001-02-23 16:40:48 +00004415
Barry Warsawa903ad982001-02-23 16:40:48 +00004416void _Py_ReleaseInternedStrings(void)
4417{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004418 PyObject *keys;
4419 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004420 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004421
4422 if (interned == NULL || !PyDict_Check(interned))
4423 return;
4424 keys = PyDict_Keys(interned);
4425 if (keys == NULL || !PyList_Check(keys)) {
4426 PyErr_Clear();
4427 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004428 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004429
4430 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4431 detector, interned strings are not forcibly deallocated; rather, we
4432 give them their stolen references back, and then clear and DECREF
4433 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004434
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004435 fprintf(stderr, "releasing interned strings\n");
4436 n = PyList_GET_SIZE(keys);
4437 for (i = 0; i < n; i++) {
4438 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4439 switch (s->ob_sstate) {
4440 case SSTATE_NOT_INTERNED:
4441 /* XXX Shouldn't happen */
4442 break;
4443 case SSTATE_INTERNED_IMMORTAL:
4444 s->ob_refcnt += 1;
4445 break;
4446 case SSTATE_INTERNED_MORTAL:
4447 s->ob_refcnt += 2;
4448 break;
4449 default:
4450 Py_FatalError("Inconsistent interned string state.");
4451 }
4452 s->ob_sstate = SSTATE_NOT_INTERNED;
4453 }
4454 Py_DECREF(keys);
4455 PyDict_Clear(interned);
4456 Py_DECREF(interned);
4457 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004458}