blob: b0c4640a0cb252a05bd877071acc1dc83cca421e [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossum45ec02a2002-08-19 21:43:18 +000015/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
Tim Petersae1d0c92006-03-17 03:29:34 +000020 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000021 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
82 memcpy(op->ob_sval, str, size);
83 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000136 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
164 memcpy(count, vargs, sizeof(va_list));
165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
306 memcpy(s, p, i);
307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000585 memcpy(p, r, rn);
586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Martin v. Löwis18e16552006-02-15 17:27:45 +0000692static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000693string_getsize(register PyObject *op)
694{
695 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000696 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697 if (PyString_AsStringAndSize(op, &s, &len))
698 return -1;
699 return len;
700}
701
702static /*const*/ char *
703string_getbuffer(register PyObject *op)
704{
705 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000706 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (PyString_AsStringAndSize(op, &s, &len))
708 return NULL;
709 return s;
710}
711
Martin v. Löwis18e16552006-02-15 17:27:45 +0000712Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000713PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000714{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000715 if (!PyString_Check(op))
716 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000717 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718}
719
720/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000721PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000723 if (!PyString_Check(op))
724 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000725 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000726}
727
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728int
729PyString_AsStringAndSize(register PyObject *obj,
730 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000731 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000732{
733 if (s == NULL) {
734 PyErr_BadInternalCall();
735 return -1;
736 }
737
738 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000739#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000740 if (PyUnicode_Check(obj)) {
741 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
742 if (obj == NULL)
743 return -1;
744 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000745 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000746#endif
747 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000748 PyErr_Format(PyExc_TypeError,
749 "expected string or Unicode object, "
750 "%.200s found", obj->ob_type->tp_name);
751 return -1;
752 }
753 }
754
755 *s = PyString_AS_STRING(obj);
756 if (len != NULL)
757 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000758 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000759 PyErr_SetString(PyExc_TypeError,
760 "expected string without null bytes");
761 return -1;
762 }
763 return 0;
764}
765
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000766/* Methods */
767
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000768static int
Fred Drakeba096332000-07-09 07:04:36 +0000769string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000771 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000772 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000773 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000774
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000775 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000776 if (! PyString_CheckExact(op)) {
777 int ret;
778 /* A str subclass may have its own __str__ method. */
779 op = (PyStringObject *) PyObject_Str((PyObject *)op);
780 if (op == NULL)
781 return -1;
782 ret = string_print(op, fp, flags);
783 Py_DECREF(op);
784 return ret;
785 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000786 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000787#ifdef __VMS
788 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
789#else
790 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
791#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000792 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794
Thomas Wouters7e474022000-07-16 12:04:32 +0000795 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000796 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000797 if (memchr(op->ob_sval, '\'', op->ob_size) &&
798 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000799 quote = '"';
800
801 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000802 for (i = 0; i < op->ob_size; i++) {
803 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000804 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000805 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000806 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000807 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000808 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000809 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000810 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000811 fprintf(fp, "\\r");
812 else if (c < ' ' || c >= 0x7f)
813 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000814 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000815 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000816 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000817 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000818 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000819}
820
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000821PyObject *
822PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000823{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000824 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000825 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000826 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000827 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000828 PyErr_SetString(PyExc_OverflowError,
829 "string is too large to make repr");
830 }
831 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000833 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 }
835 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000836 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000837 register char c;
838 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 int quote;
840
Thomas Wouters7e474022000-07-16 12:04:32 +0000841 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000842 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000843 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000844 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000845 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000846 quote = '"';
847
Tim Peters9161c8b2001-12-03 01:55:38 +0000848 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000850 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000851 /* There's at least enough room for a hex escape
852 and a closing quote. */
853 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000855 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000856 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000857 else if (c == '\t')
858 *p++ = '\\', *p++ = 't';
859 else if (c == '\n')
860 *p++ = '\\', *p++ = 'n';
861 else if (c == '\r')
862 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000863 else if (c < ' ' || c >= 0x7f) {
864 /* For performance, we don't want to call
865 PyOS_snprintf here (extra layers of
866 function call). */
867 sprintf(p, "\\x%02x", c & 0xff);
868 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000869 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000870 else
871 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000872 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000873 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000874 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000875 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000876 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000877 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000878 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000879 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880}
881
Guido van Rossum189f1df2001-05-01 16:51:53 +0000882static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000883string_repr(PyObject *op)
884{
885 return PyString_Repr(op, 1);
886}
887
888static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000889string_str(PyObject *s)
890{
Tim Petersc9933152001-10-16 20:18:24 +0000891 assert(PyString_Check(s));
892 if (PyString_CheckExact(s)) {
893 Py_INCREF(s);
894 return s;
895 }
896 else {
897 /* Subtype -- return genuine string with the same value. */
898 PyStringObject *t = (PyStringObject *) s;
899 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
900 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000901}
902
Martin v. Löwis18e16552006-02-15 17:27:45 +0000903static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000904string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000905{
906 return a->ob_size;
907}
908
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000909static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000910string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000912 register size_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000913 register PyStringObject *op;
914 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000915#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000916 if (PyUnicode_Check(bb))
917 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000918#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000919 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000920 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000921 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922 return NULL;
923 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000924#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000926 if ((a->ob_size == 0 || b->ob_size == 0) &&
927 PyString_CheckExact(a) && PyString_CheckExact(b)) {
928 if (a->ob_size == 0) {
929 Py_INCREF(bb);
930 return bb;
931 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000932 Py_INCREF(a);
933 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000934 }
935 size = a->ob_size + b->ob_size;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000936 /* XXX check overflow */
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000937 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000938 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000939 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000940 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000941 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000942 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000943 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000944 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
945 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000946 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948#undef b
949}
950
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000951static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000952string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000953{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000954 register Py_ssize_t i;
955 register Py_ssize_t j;
956 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000957 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000958 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000959 if (n < 0)
960 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000961 /* watch out for overflows: the size can overflow int,
962 * and the # of bytes needed can overflow size_t
963 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000964 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000965 if (n && size / n != a->ob_size) {
966 PyErr_SetString(PyExc_OverflowError,
967 "repeated string is too long");
968 return NULL;
969 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000970 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000971 Py_INCREF(a);
972 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973 }
Tim Peterse7c05322004-06-27 17:24:49 +0000974 nbytes = (size_t)size;
975 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000976 PyErr_SetString(PyExc_OverflowError,
977 "repeated string is too long");
978 return NULL;
979 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000980 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000981 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000982 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000984 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000985 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000986 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000987 op->ob_sval[size] = '\0';
988 if (a->ob_size == 1 && n > 0) {
989 memset(op->ob_sval, a->ob_sval[0] , n);
990 return (PyObject *) op;
991 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000992 i = 0;
993 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000994 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
995 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000996 }
997 while (i < size) {
998 j = (i <= size-i) ? i : size-i;
999 memcpy(op->ob_sval+i, op->ob_sval, j);
1000 i += j;
1001 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001002 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003}
1004
1005/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1006
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001007static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001008string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001009 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001010 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001011{
1012 if (i < 0)
1013 i = 0;
1014 if (j < 0)
1015 j = 0; /* Avoid signed/unsigned bug in next line */
1016 if (j > a->ob_size)
1017 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001018 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1019 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001020 Py_INCREF(a);
1021 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001022 }
1023 if (j < i)
1024 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001025 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001026}
1027
Guido van Rossum9284a572000-03-07 15:53:43 +00001028static int
Fred Drakeba096332000-07-09 07:04:36 +00001029string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001030{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001031 char *s = PyString_AS_STRING(a);
1032 const char *sub = PyString_AS_STRING(el);
1033 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001035 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001036 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001037
1038 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001039#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001040 if (PyUnicode_Check(el))
1041 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001042#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001043 if (!PyString_Check(el)) {
1044 PyErr_SetString(PyExc_TypeError,
1045 "'in <string>' requires string as left operand");
1046 return -1;
1047 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001048 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001049
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001050 if (len_sub == 0)
1051 return 1;
Tim Petersae1d0c92006-03-17 03:29:34 +00001052 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001053 substring. When s<last, there is still room for a possible match
1054 and s[0] through s[len_sub-1] will be in bounds.
1055 shortsub is len_sub minus the last character which is checked
1056 separately just before the memcmp(). That check helps prevent
1057 false starts and saves the setup time for memcmp().
1058 */
1059 firstchar = sub[0];
1060 shortsub = len_sub - 1;
1061 lastchar = sub[shortsub];
1062 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1063 while (s < last) {
Anthony Baxtera6286212006-04-11 07:42:36 +00001064 s = (char *)memchr(s, firstchar, last-s);
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001065 if (s == NULL)
1066 return 0;
1067 assert(s < last);
1068 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001069 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001070 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001071 }
1072 return 0;
1073}
1074
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001075static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001076string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001077{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001078 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001079 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001080 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001081 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001082 return NULL;
1083 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001084 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001085 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001086 if (v == NULL)
1087 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001088 else {
1089#ifdef COUNT_ALLOCS
1090 one_strings++;
1091#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001092 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001093 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001094 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001095}
1096
Martin v. Löwiscd353062001-05-24 16:56:35 +00001097static PyObject*
1098string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001099{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001100 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001101 Py_ssize_t len_a, len_b;
1102 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001103 PyObject *result;
1104
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001105 /* Make sure both arguments are strings. */
1106 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001107 result = Py_NotImplemented;
1108 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001109 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001110 if (a == b) {
1111 switch (op) {
1112 case Py_EQ:case Py_LE:case Py_GE:
1113 result = Py_True;
1114 goto out;
1115 case Py_NE:case Py_LT:case Py_GT:
1116 result = Py_False;
1117 goto out;
1118 }
1119 }
1120 if (op == Py_EQ) {
1121 /* Supporting Py_NE here as well does not save
1122 much time, since Py_NE is rarely used. */
1123 if (a->ob_size == b->ob_size
1124 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001125 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001126 a->ob_size) == 0)) {
1127 result = Py_True;
1128 } else {
1129 result = Py_False;
1130 }
1131 goto out;
1132 }
1133 len_a = a->ob_size; len_b = b->ob_size;
1134 min_len = (len_a < len_b) ? len_a : len_b;
1135 if (min_len > 0) {
1136 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1137 if (c==0)
1138 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1139 }else
1140 c = 0;
1141 if (c == 0)
1142 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1143 switch (op) {
1144 case Py_LT: c = c < 0; break;
1145 case Py_LE: c = c <= 0; break;
1146 case Py_EQ: assert(0); break; /* unreachable */
1147 case Py_NE: c = c != 0; break;
1148 case Py_GT: c = c > 0; break;
1149 case Py_GE: c = c >= 0; break;
1150 default:
1151 result = Py_NotImplemented;
1152 goto out;
1153 }
1154 result = c ? Py_True : Py_False;
1155 out:
1156 Py_INCREF(result);
1157 return result;
1158}
1159
1160int
1161_PyString_Eq(PyObject *o1, PyObject *o2)
1162{
1163 PyStringObject *a, *b;
1164 a = (PyStringObject*)o1;
1165 b = (PyStringObject*)o2;
1166 return a->ob_size == b->ob_size
1167 && *a->ob_sval == *b->ob_sval
1168 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001169}
1170
Guido van Rossum9bfef441993-03-29 10:43:31 +00001171static long
Fred Drakeba096332000-07-09 07:04:36 +00001172string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001173{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001174 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001175 register unsigned char *p;
1176 register long x;
1177
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001178 if (a->ob_shash != -1)
1179 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001180 len = a->ob_size;
1181 p = (unsigned char *) a->ob_sval;
1182 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001183 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001184 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001185 x ^= a->ob_size;
1186 if (x == -1)
1187 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001188 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001189 return x;
1190}
1191
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001192#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1193
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001194static PyObject*
1195string_subscript(PyStringObject* self, PyObject* item)
1196{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001197 PyNumberMethods *nb = item->ob_type->tp_as_number;
1198 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1199 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001200 if (i == -1 && PyErr_Occurred())
1201 return NULL;
1202 if (i < 0)
1203 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001204 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001205 }
1206 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001207 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001208 char* source_buf;
1209 char* result_buf;
1210 PyObject* result;
1211
Tim Petersae1d0c92006-03-17 03:29:34 +00001212 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001213 PyString_GET_SIZE(self),
1214 &start, &stop, &step, &slicelength) < 0) {
1215 return NULL;
1216 }
1217
1218 if (slicelength <= 0) {
1219 return PyString_FromStringAndSize("", 0);
1220 }
1221 else {
1222 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001223 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001224 if (result_buf == NULL)
1225 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001226
Tim Petersae1d0c92006-03-17 03:29:34 +00001227 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 cur += step, i++) {
1229 result_buf[i] = source_buf[cur];
1230 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001231
1232 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001233 slicelength);
1234 PyMem_Free(result_buf);
1235 return result;
1236 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001237 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001238 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001239 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001240 "string indices must be integers");
1241 return NULL;
1242 }
1243}
1244
Martin v. Löwis18e16552006-02-15 17:27:45 +00001245static Py_ssize_t
1246string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001247{
1248 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001249 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001250 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001251 return -1;
1252 }
1253 *ptr = (void *)self->ob_sval;
1254 return self->ob_size;
1255}
1256
Martin v. Löwis18e16552006-02-15 17:27:45 +00001257static Py_ssize_t
1258string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001259{
Guido van Rossum045e6881997-09-08 18:30:11 +00001260 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001261 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001262 return -1;
1263}
1264
Martin v. Löwis18e16552006-02-15 17:27:45 +00001265static Py_ssize_t
1266string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001267{
1268 if ( lenp )
1269 *lenp = self->ob_size;
1270 return 1;
1271}
1272
Martin v. Löwis18e16552006-02-15 17:27:45 +00001273static Py_ssize_t
1274string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001275{
1276 if ( index != 0 ) {
1277 PyErr_SetString(PyExc_SystemError,
1278 "accessing non-existent string segment");
1279 return -1;
1280 }
1281 *ptr = self->ob_sval;
1282 return self->ob_size;
1283}
1284
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001285static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001286 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001287 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001288 (ssizeargfunc)string_repeat, /*sq_repeat*/
1289 (ssizeargfunc)string_item, /*sq_item*/
1290 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001291 0, /*sq_ass_item*/
1292 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001293 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001294};
1295
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001296static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001297 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001298 (binaryfunc)string_subscript,
1299 0,
1300};
1301
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001302static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001303 (readbufferproc)string_buffer_getreadbuf,
1304 (writebufferproc)string_buffer_getwritebuf,
1305 (segcountproc)string_buffer_getsegcount,
1306 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001307};
1308
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001309
1310
1311#define LEFTSTRIP 0
1312#define RIGHTSTRIP 1
1313#define BOTHSTRIP 2
1314
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001315/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001316static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1317
1318#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001319
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001320#define SPLIT_APPEND(data, left, right) \
1321 str = PyString_FromStringAndSize((data) + (left), \
1322 (right) - (left)); \
1323 if (str == NULL) \
1324 goto onError; \
1325 if (PyList_Append(list, str)) { \
1326 Py_DECREF(str); \
1327 goto onError; \
1328 } \
1329 else \
1330 Py_DECREF(str);
1331
1332#define SPLIT_INSERT(data, left, right) \
1333 str = PyString_FromStringAndSize((data) + (left), \
1334 (right) - (left)); \
1335 if (str == NULL) \
1336 goto onError; \
1337 if (PyList_Insert(list, 0, str)) { \
1338 Py_DECREF(str); \
1339 goto onError; \
1340 } \
1341 else \
1342 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001343
1344static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001345split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001347 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001348 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 PyObject *list = PyList_New(0);
1350
1351 if (list == NULL)
1352 return NULL;
1353
Guido van Rossum4c08d552000-03-10 22:55:18 +00001354 for (i = j = 0; i < len; ) {
1355 while (i < len && isspace(Py_CHARMASK(s[i])))
1356 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001357 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001358 while (i < len && !isspace(Py_CHARMASK(s[i])))
1359 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001360 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001361 if (maxsplit-- <= 0)
1362 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001363 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001364 while (i < len && isspace(Py_CHARMASK(s[i])))
1365 i++;
1366 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367 }
1368 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001369 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001370 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001371 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001373 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374 Py_DECREF(list);
1375 return NULL;
1376}
1377
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001378static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001379split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001380{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001381 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001382 PyObject *str;
1383 PyObject *list = PyList_New(0);
1384
1385 if (list == NULL)
1386 return NULL;
1387
1388 for (i = j = 0; i < len; ) {
1389 if (s[i] == ch) {
1390 if (maxcount-- <= 0)
1391 break;
1392 SPLIT_APPEND(s, j, i);
1393 i = j = i + 1;
1394 } else
1395 i++;
1396 }
1397 if (j <= len) {
1398 SPLIT_APPEND(s, j, len);
1399 }
1400 return list;
1401
1402 onError:
1403 Py_DECREF(list);
1404 return NULL;
1405}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001407PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408"S.split([sep [,maxsplit]]) -> list of strings\n\
1409\n\
1410Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001411delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001412splits are done. If sep is not specified or is None, any\n\
1413whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001414
1415static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001416string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001417{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001418 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1419 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001420 Py_ssize_t maxsplit = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001421 const char *s = PyString_AS_STRING(self), *sub;
1422 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423
Martin v. Löwis9c830762006-04-13 08:37:17 +00001424 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001426 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001427 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001428 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001429 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001430 if (PyString_Check(subobj)) {
1431 sub = PyString_AS_STRING(subobj);
1432 n = PyString_GET_SIZE(subobj);
1433 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001434#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001435 else if (PyUnicode_Check(subobj))
1436 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001437#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001438 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1439 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001440
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441 if (n == 0) {
1442 PyErr_SetString(PyExc_ValueError, "empty separator");
1443 return NULL;
1444 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001445 else if (n == 1)
1446 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447
1448 list = PyList_New(0);
1449 if (list == NULL)
1450 return NULL;
1451
1452 i = j = 0;
1453 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001454 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001455 if (maxsplit-- <= 0)
1456 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001457 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458 if (item == NULL)
1459 goto fail;
1460 err = PyList_Append(list, item);
1461 Py_DECREF(item);
1462 if (err < 0)
1463 goto fail;
1464 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465 }
1466 else
1467 i++;
1468 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001469 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470 if (item == NULL)
1471 goto fail;
1472 err = PyList_Append(list, item);
1473 Py_DECREF(item);
1474 if (err < 0)
1475 goto fail;
1476
1477 return list;
1478
1479 fail:
1480 Py_DECREF(list);
1481 return NULL;
1482}
1483
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001484static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001485rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001486{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001487 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001488 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001489 PyObject *list = PyList_New(0);
1490
1491 if (list == NULL)
1492 return NULL;
1493
1494 for (i = j = len - 1; i >= 0; ) {
1495 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1496 i--;
1497 j = i;
1498 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1499 i--;
1500 if (j > i) {
1501 if (maxsplit-- <= 0)
1502 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001503 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001504 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1505 i--;
1506 j = i;
1507 }
1508 }
1509 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001510 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001511 }
1512 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001513 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001514 Py_DECREF(list);
1515 return NULL;
1516}
1517
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001518static PyObject *
Martin v. Löwis83687c92006-04-13 08:52:56 +00001519rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001520{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001521 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001522 PyObject *str;
1523 PyObject *list = PyList_New(0);
1524
1525 if (list == NULL)
1526 return NULL;
1527
1528 for (i = j = len - 1; i >= 0; ) {
1529 if (s[i] == ch) {
1530 if (maxcount-- <= 0)
1531 break;
1532 SPLIT_INSERT(s, i + 1, j + 1);
1533 j = i = i - 1;
1534 } else
1535 i--;
1536 }
1537 if (j >= -1) {
1538 SPLIT_INSERT(s, 0, j + 1);
1539 }
1540 return list;
1541
1542 onError:
1543 Py_DECREF(list);
1544 return NULL;
1545}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001546
1547PyDoc_STRVAR(rsplit__doc__,
1548"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1549\n\
1550Return a list of the words in the string S, using sep as the\n\
1551delimiter string, starting at the end of the string and working\n\
1552to the front. If maxsplit is given, at most maxsplit splits are\n\
1553done. If sep is not specified or is None, any whitespace string\n\
1554is a separator.");
1555
1556static PyObject *
1557string_rsplit(PyStringObject *self, PyObject *args)
1558{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001559 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1560 int err;
Martin v. Löwis9c830762006-04-13 08:37:17 +00001561 Py_ssize_t maxsplit = -1;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001562 const char *s = PyString_AS_STRING(self), *sub;
1563 PyObject *list, *item, *subobj = Py_None;
1564
Martin v. Löwis9c830762006-04-13 08:37:17 +00001565 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001566 return NULL;
1567 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001568 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001569 if (subobj == Py_None)
1570 return rsplit_whitespace(s, len, maxsplit);
1571 if (PyString_Check(subobj)) {
1572 sub = PyString_AS_STRING(subobj);
1573 n = PyString_GET_SIZE(subobj);
1574 }
1575#ifdef Py_USING_UNICODE
1576 else if (PyUnicode_Check(subobj))
1577 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1578#endif
1579 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1580 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001581
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001582 if (n == 0) {
1583 PyErr_SetString(PyExc_ValueError, "empty separator");
1584 return NULL;
1585 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001586 else if (n == 1)
1587 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001588
1589 list = PyList_New(0);
1590 if (list == NULL)
1591 return NULL;
1592
1593 j = len;
1594 i = j - n;
1595 while (i >= 0) {
1596 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1597 if (maxsplit-- <= 0)
1598 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001599 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001600 if (item == NULL)
1601 goto fail;
1602 err = PyList_Insert(list, 0, item);
1603 Py_DECREF(item);
1604 if (err < 0)
1605 goto fail;
1606 j = i;
1607 i -= n;
1608 }
1609 else
1610 i--;
1611 }
1612 item = PyString_FromStringAndSize(s, j);
1613 if (item == NULL)
1614 goto fail;
1615 err = PyList_Insert(list, 0, item);
1616 Py_DECREF(item);
1617 if (err < 0)
1618 goto fail;
1619
1620 return list;
1621
1622 fail:
1623 Py_DECREF(list);
1624 return NULL;
1625}
1626
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001628PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629"S.join(sequence) -> string\n\
1630\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001632sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001633
1634static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001635string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636{
1637 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001638 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001641 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001642 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001643 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001644 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001645
Tim Peters19fe14e2001-01-19 03:03:47 +00001646 seq = PySequence_Fast(orig, "");
1647 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001648 return NULL;
1649 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001650
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001651 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001652 if (seqlen == 0) {
1653 Py_DECREF(seq);
1654 return PyString_FromString("");
1655 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001657 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001658 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1659 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001660 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001661 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001662 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001664
Raymond Hettinger674f2412004-08-23 23:23:54 +00001665 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001666 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001667 * Do a pre-pass to figure out the total amount of space we'll
1668 * need (sz), see whether any argument is absurd, and defer to
1669 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001670 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001671 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001672 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001673 item = PySequence_Fast_GET_ITEM(seq, i);
1674 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001675#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001676 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001677 /* Defer to Unicode join.
1678 * CAUTION: There's no gurantee that the
1679 * original sequence can be iterated over
1680 * again, so we must pass seq here.
1681 */
1682 PyObject *result;
1683 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001684 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001685 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001686 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001687#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001688 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001689 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001690 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001691 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001692 Py_DECREF(seq);
1693 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001694 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001695 sz += PyString_GET_SIZE(item);
1696 if (i != 0)
1697 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001698 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001699 PyErr_SetString(PyExc_OverflowError,
1700 "join() is too long for a Python string");
1701 Py_DECREF(seq);
1702 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001703 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001704 }
1705
1706 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001707 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001708 if (res == NULL) {
1709 Py_DECREF(seq);
1710 return NULL;
1711 }
1712
1713 /* Catenate everything. */
1714 p = PyString_AS_STRING(res);
1715 for (i = 0; i < seqlen; ++i) {
1716 size_t n;
1717 item = PySequence_Fast_GET_ITEM(seq, i);
1718 n = PyString_GET_SIZE(item);
1719 memcpy(p, PyString_AS_STRING(item), n);
1720 p += n;
1721 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001722 memcpy(p, sep, seplen);
1723 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001724 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001726
Jeremy Hylton49048292000-07-11 03:28:17 +00001727 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001728 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729}
1730
Tim Peters52e155e2001-06-16 05:42:57 +00001731PyObject *
1732_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001733{
Tim Petersa7259592001-06-16 05:11:17 +00001734 assert(sep != NULL && PyString_Check(sep));
1735 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001736 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001737}
1738
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001739static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001740string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001741{
1742 if (*end > len)
1743 *end = len;
1744 else if (*end < 0)
1745 *end += len;
1746 if (*end < 0)
1747 *end = 0;
1748 if (*start < 0)
1749 *start += len;
1750 if (*start < 0)
1751 *start = 0;
1752}
1753
Martin v. Löwis18e16552006-02-15 17:27:45 +00001754static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001755string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001757 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001758 Py_ssize_t len = PyString_GET_SIZE(self);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001759 Py_ssize_t n, i = 0, last = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761
Martin v. Löwis18e16552006-02-15 17:27:45 +00001762 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001763 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001764 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001765 return -2;
1766 if (PyString_Check(subobj)) {
1767 sub = PyString_AS_STRING(subobj);
1768 n = PyString_GET_SIZE(subobj);
1769 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001770#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001771 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001772 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001773#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001774 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001775 return -2;
1776
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001777 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778
Guido van Rossum4c08d552000-03-10 22:55:18 +00001779 if (dir > 0) {
1780 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001782 last -= n;
1783 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001784 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001785 return (long)i;
1786 }
1787 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001788 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001789
Guido van Rossum4c08d552000-03-10 22:55:18 +00001790 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001791 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001792 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001793 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001794 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001795 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001796
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797 return -1;
1798}
1799
1800
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001801PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802"S.find(sub [,start [,end]]) -> int\n\
1803\n\
1804Return the lowest index in S where substring sub is found,\n\
1805such that sub is contained within s[start,end]. Optional\n\
1806arguments start and end are interpreted as in slice notation.\n\
1807\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001808Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809
1810static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001811string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001813 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814 if (result == -2)
1815 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001816 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817}
1818
1819
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001820PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001821"S.index(sub [,start [,end]]) -> int\n\
1822\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001823Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824
1825static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001826string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001828 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829 if (result == -2)
1830 return NULL;
1831 if (result == -1) {
1832 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001833 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001834 return NULL;
1835 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001836 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001837}
1838
1839
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001840PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841"S.rfind(sub [,start [,end]]) -> int\n\
1842\n\
1843Return the highest index in S where substring sub is found,\n\
1844such that sub is contained within s[start,end]. Optional\n\
1845arguments start and end are interpreted as in slice notation.\n\
1846\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001847Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001848
1849static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001850string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001852 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853 if (result == -2)
1854 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001855 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856}
1857
1858
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001859PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001860"S.rindex(sub [,start [,end]]) -> int\n\
1861\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001862Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863
1864static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001865string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001867 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868 if (result == -2)
1869 return NULL;
1870 if (result == -1) {
1871 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001872 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001873 return NULL;
1874 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001875 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001876}
1877
1878
1879static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001880do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1881{
1882 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001883 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001884 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001885 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1886 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001887
1888 i = 0;
1889 if (striptype != RIGHTSTRIP) {
1890 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1891 i++;
1892 }
1893 }
1894
1895 j = len;
1896 if (striptype != LEFTSTRIP) {
1897 do {
1898 j--;
1899 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1900 j++;
1901 }
1902
1903 if (i == 0 && j == len && PyString_CheckExact(self)) {
1904 Py_INCREF(self);
1905 return (PyObject*)self;
1906 }
1907 else
1908 return PyString_FromStringAndSize(s+i, j-i);
1909}
1910
1911
1912static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001913do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914{
1915 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001916 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918 i = 0;
1919 if (striptype != RIGHTSTRIP) {
1920 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1921 i++;
1922 }
1923 }
1924
1925 j = len;
1926 if (striptype != LEFTSTRIP) {
1927 do {
1928 j--;
1929 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1930 j++;
1931 }
1932
Tim Peters8fa5dd02001-09-12 02:18:30 +00001933 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934 Py_INCREF(self);
1935 return (PyObject*)self;
1936 }
1937 else
1938 return PyString_FromStringAndSize(s+i, j-i);
1939}
1940
1941
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001942static PyObject *
1943do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1944{
1945 PyObject *sep = NULL;
1946
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001947 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001948 return NULL;
1949
1950 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001951 if (PyString_Check(sep))
1952 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001953#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001954 else if (PyUnicode_Check(sep)) {
1955 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1956 PyObject *res;
1957 if (uniself==NULL)
1958 return NULL;
1959 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1960 striptype, sep);
1961 Py_DECREF(uniself);
1962 return res;
1963 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001964#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00001965 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001966#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00001967 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001968#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00001969 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001970#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00001971 STRIPNAME(striptype));
1972 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001973 }
1974
1975 return do_strip(self, striptype);
1976}
1977
1978
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001979PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001980"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981\n\
1982Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001983whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001984If chars is given and not None, remove characters in chars instead.\n\
1985If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986
1987static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001988string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001989{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001990 if (PyTuple_GET_SIZE(args) == 0)
1991 return do_strip(self, BOTHSTRIP); /* Common case */
1992 else
1993 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994}
1995
1996
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001997PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001998"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002000Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002001If chars is given and not None, remove characters in chars instead.\n\
2002If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003
2004static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002005string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002007 if (PyTuple_GET_SIZE(args) == 0)
2008 return do_strip(self, LEFTSTRIP); /* Common case */
2009 else
2010 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002011}
2012
2013
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002014PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002015"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002017Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002018If chars is given and not None, remove characters in chars instead.\n\
2019If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020
2021static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002022string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002023{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002024 if (PyTuple_GET_SIZE(args) == 0)
2025 return do_strip(self, RIGHTSTRIP); /* Common case */
2026 else
2027 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028}
2029
2030
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002031PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002032"S.lower() -> string\n\
2033\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002034Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002036/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2037#ifndef _tolower
2038#define _tolower tolower
2039#endif
2040
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002041static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002042string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002044 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002045 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002046 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002047
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002048 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002049 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002050 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002051
2052 s = PyString_AS_STRING(newobj);
2053
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002054 memcpy(s, PyString_AS_STRING(self), n);
2055
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002056 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002057 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002058 if (isupper(c))
2059 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002060 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002061
Anthony Baxtera6286212006-04-11 07:42:36 +00002062 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002063}
2064
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002065PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066"S.upper() -> string\n\
2067\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002068Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002069
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002070#ifndef _toupper
2071#define _toupper toupper
2072#endif
2073
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002074static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002075string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002076{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002077 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002078 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002079 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002081 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002082 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002084
2085 s = PyString_AS_STRING(newobj);
2086
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002087 memcpy(s, PyString_AS_STRING(self), n);
2088
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002089 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002090 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002091 if (islower(c))
2092 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002094
Anthony Baxtera6286212006-04-11 07:42:36 +00002095 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096}
2097
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002098PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002099"S.title() -> string\n\
2100\n\
2101Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002102characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002103
2104static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002105string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002106{
2107 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002108 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002110 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111
Anthony Baxtera6286212006-04-11 07:42:36 +00002112 newobj = PyString_FromStringAndSize(NULL, n);
2113 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002114 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002115 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002116 for (i = 0; i < n; i++) {
2117 int c = Py_CHARMASK(*s++);
2118 if (islower(c)) {
2119 if (!previous_is_cased)
2120 c = toupper(c);
2121 previous_is_cased = 1;
2122 } else if (isupper(c)) {
2123 if (previous_is_cased)
2124 c = tolower(c);
2125 previous_is_cased = 1;
2126 } else
2127 previous_is_cased = 0;
2128 *s_new++ = c;
2129 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002130 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002131}
2132
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002133PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134"S.capitalize() -> string\n\
2135\n\
2136Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002137capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138
2139static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002140string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141{
2142 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002143 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002144 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145
Anthony Baxtera6286212006-04-11 07:42:36 +00002146 newobj = PyString_FromStringAndSize(NULL, n);
2147 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002149 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002150 if (0 < n) {
2151 int c = Py_CHARMASK(*s++);
2152 if (islower(c))
2153 *s_new = toupper(c);
2154 else
2155 *s_new = c;
2156 s_new++;
2157 }
2158 for (i = 1; i < n; i++) {
2159 int c = Py_CHARMASK(*s++);
2160 if (isupper(c))
2161 *s_new = tolower(c);
2162 else
2163 *s_new = c;
2164 s_new++;
2165 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002166 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167}
2168
2169
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002170PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171"S.count(sub[, start[, end]]) -> int\n\
2172\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002173Return the number of non-overlapping occurrences of substring sub in\n\
2174string S[start:end]. Optional arguments start and end are interpreted\n\
2175as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176
2177static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002178string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002180 const char *s = PyString_AS_STRING(self), *sub, *t;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002181 Py_ssize_t len = PyString_GET_SIZE(self), n;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00002182 Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002183 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002184 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185
Guido van Rossumc6821402000-05-08 14:08:05 +00002186 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2187 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002189
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190 if (PyString_Check(subobj)) {
2191 sub = PyString_AS_STRING(subobj);
2192 n = PyString_GET_SIZE(subobj);
2193 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002194#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002195 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002196 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002197 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2198 if (count == -1)
2199 return NULL;
2200 else
2201 return PyInt_FromLong((long) count);
2202 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002203#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002204 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2205 return NULL;
2206
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002207 string_adjust_indices(&i, &last, len);
2208
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209 m = last + 1 - n;
2210 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002211 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002212
2213 r = 0;
2214 while (i < m) {
2215 if (!memcmp(s+i, sub, n)) {
2216 r++;
2217 i += n;
2218 } else {
2219 i++;
2220 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002221 if (i >= m)
2222 break;
Anthony Baxtera6286212006-04-11 07:42:36 +00002223 t = (const char *)memchr(s+i, sub[0], m-i);
Raymond Hettinger57e74472005-02-20 09:54:53 +00002224 if (t == NULL)
2225 break;
2226 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002228 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229}
2230
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002231PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232"S.swapcase() -> string\n\
2233\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002234Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002235converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236
2237static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002238string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239{
2240 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002241 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002242 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002243
Anthony Baxtera6286212006-04-11 07:42:36 +00002244 newobj = PyString_FromStringAndSize(NULL, n);
2245 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002247 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002248 for (i = 0; i < n; i++) {
2249 int c = Py_CHARMASK(*s++);
2250 if (islower(c)) {
2251 *s_new = toupper(c);
2252 }
2253 else if (isupper(c)) {
2254 *s_new = tolower(c);
2255 }
2256 else
2257 *s_new = c;
2258 s_new++;
2259 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002260 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261}
2262
2263
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002264PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265"S.translate(table [,deletechars]) -> string\n\
2266\n\
2267Return a copy of the string S, where all characters occurring\n\
2268in the optional argument deletechars are removed, and the\n\
2269remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002270translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002271
2272static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002273string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002274{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002275 register char *input, *output;
2276 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002277 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002279 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002280 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002281 PyObject *result;
2282 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002284
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002285 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002286 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002288
2289 if (PyString_Check(tableobj)) {
2290 table1 = PyString_AS_STRING(tableobj);
2291 tablen = PyString_GET_SIZE(tableobj);
2292 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002293#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002295 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296 parameter; instead a mapping to None will cause characters
2297 to be deleted. */
2298 if (delobj != NULL) {
2299 PyErr_SetString(PyExc_TypeError,
2300 "deletions are implemented differently for unicode");
2301 return NULL;
2302 }
2303 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2304 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002305#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308
Martin v. Löwis00b61272002-12-12 20:03:19 +00002309 if (tablen != 256) {
2310 PyErr_SetString(PyExc_ValueError,
2311 "translation table must be 256 characters long");
2312 return NULL;
2313 }
2314
Guido van Rossum4c08d552000-03-10 22:55:18 +00002315 if (delobj != NULL) {
2316 if (PyString_Check(delobj)) {
2317 del_table = PyString_AS_STRING(delobj);
2318 dellen = PyString_GET_SIZE(delobj);
2319 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002320#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002321 else if (PyUnicode_Check(delobj)) {
2322 PyErr_SetString(PyExc_TypeError,
2323 "deletions are implemented differently for unicode");
2324 return NULL;
2325 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002326#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2328 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002329 }
2330 else {
2331 del_table = NULL;
2332 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333 }
2334
2335 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002336 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337 result = PyString_FromStringAndSize((char *)NULL, inlen);
2338 if (result == NULL)
2339 return NULL;
2340 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002341 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342
2343 if (dellen == 0) {
2344 /* If no deletions are required, use faster code */
2345 for (i = inlen; --i >= 0; ) {
2346 c = Py_CHARMASK(*input++);
2347 if (Py_CHARMASK((*output++ = table[c])) != c)
2348 changed = 1;
2349 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002350 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351 return result;
2352 Py_DECREF(result);
2353 Py_INCREF(input_obj);
2354 return input_obj;
2355 }
2356
2357 for (i = 0; i < 256; i++)
2358 trans_table[i] = Py_CHARMASK(table[i]);
2359
2360 for (i = 0; i < dellen; i++)
2361 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2362
2363 for (i = inlen; --i >= 0; ) {
2364 c = Py_CHARMASK(*input++);
2365 if (trans_table[c] != -1)
2366 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2367 continue;
2368 changed = 1;
2369 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002370 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002371 Py_DECREF(result);
2372 Py_INCREF(input_obj);
2373 return input_obj;
2374 }
2375 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002376 if (inlen > 0)
2377 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378 return result;
2379}
2380
2381
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002382#define FORWARD 1
2383#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002384
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002385/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002387/* Don't call if length < 2 */
2388#define Py_STRING_MATCH(target, offset, pattern, length) \
2389 (target[offset] == pattern[0] && \
2390 target[offset+length-1] == pattern[length-1] && \
2391 !memcmp(target+offset+1, pattern+1, length-2) )
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002392
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002393#define findchar(target, target_len, c) \
2394 ((char *)memchr((const void *)(target), c, target_len))
2395
2396/* String ops must return a string. */
2397/* If the object is subclass of string, create a copy */
2398static PyStringObject *
2399return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002400{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002401 if (PyString_CheckExact(self)) {
2402 Py_INCREF(self);
2403 return self;
2404 }
2405 return (PyStringObject *)PyString_FromStringAndSize(
2406 PyString_AS_STRING(self),
2407 PyString_GET_SIZE(self));
2408}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002410static Py_ssize_t
2411countchar(char *target, int target_len, char c)
2412{
2413 Py_ssize_t count=0;
2414 char *start=target;
2415 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002417 while ( (start=findchar(start, end-start, c)) != NULL ) {
2418 count++;
2419 start += 1;
2420 }
2421
2422 return count;
2423}
2424
2425static Py_ssize_t
2426findstring(char *target, Py_ssize_t target_len,
2427 char *pattern, Py_ssize_t pattern_len,
2428 Py_ssize_t start,
2429 Py_ssize_t end,
2430 int direction)
2431{
2432 if (start < 0) {
2433 start += target_len;
2434 if (start < 0)
2435 start = 0;
2436 }
2437 if (end > target_len) {
2438 end = target_len;
2439 } else if (end < 0) {
2440 end += target_len;
2441 if (end < 0)
2442 end = 0;
2443 }
2444
2445 /* zero-length substrings always match at the first attempt */
2446 if (pattern_len == 0)
2447 return (direction > 0) ? start : end;
2448
2449 end -= pattern_len;
2450
2451 if (direction < 0) {
2452 for (; end >= start; end--)
2453 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2454 return end;
2455 } else {
2456 for (; start <= end; start++)
2457 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2458 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459 }
2460 return -1;
2461}
2462
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002463Py_ssize_t
2464countstring(char *target, Py_ssize_t target_len,
2465 char *pattern, Py_ssize_t pattern_len,
2466 Py_ssize_t start,
2467 Py_ssize_t end,
2468 int direction)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002470 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002471
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002472 if (start < 0) {
2473 start += target_len;
2474 if (start < 0)
2475 start = 0;
2476 }
2477 if (end > target_len) {
2478 end = target_len;
2479 } else if (end < 0) {
2480 end += target_len;
2481 if (end < 0)
2482 end = 0;
2483 }
2484
2485 /* zero-length substrings match everywhere */
2486 if (pattern_len == 0)
2487 return target_len+1;
2488
2489 end -= pattern_len;
2490
2491 if (direction < 0) {
2492 for (; end >= start; end--)
2493 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2494 count++;
2495 end -= pattern_len-1;
2496 }
2497 } else {
2498 for (; start <= end; start++)
2499 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2500 count++;
2501 start += pattern_len-1;
2502 }
2503 }
2504 return count;
2505}
2506
2507
2508/* Algorithms for difference cases of string replacement */
2509
2510/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2511static PyStringObject *
2512replace_interleave(PyStringObject *self,
2513 PyStringObject *to,
2514 Py_ssize_t maxcount)
2515{
2516 char *self_s, *to_s, *result_s;
2517 Py_ssize_t self_len, to_len, result_len;
2518 Py_ssize_t count, i, product;
2519 PyStringObject *result;
2520
2521 self_len = PyString_GET_SIZE(self);
2522 to_len = PyString_GET_SIZE(to);
2523
2524 /* 1 at the end plus 1 after every character */
2525 count = self_len+1;
2526 if (maxcount < count)
2527 count = maxcount;
2528
2529 /* Check for overflow */
2530 /* result_len = count * to_len + self_len; */
2531 product = count * to_len;
2532 if (product / to_len != count) {
2533 PyErr_SetString(PyExc_OverflowError,
2534 "replace string is too long");
2535 return NULL;
2536 }
2537 result_len = product + self_len;
2538 if (result_len < 0) {
2539 PyErr_SetString(PyExc_OverflowError,
2540 "replace string is too long");
2541 return NULL;
2542 }
2543
2544 if (! (result = (PyStringObject *)
2545 PyString_FromStringAndSize(NULL, result_len)) )
2546 return NULL;
2547
2548 self_s = PyString_AS_STRING(self);
2549 to_s = PyString_AS_STRING(to);
2550 to_len = PyString_GET_SIZE(to);
2551 result_s = PyString_AS_STRING(result);
2552
2553 /* TODO: special case single character, which doesn't need memcpy */
2554
2555 /* Lay the first one down (guaranteed this will occur) */
2556 memcpy(result_s, to_s, to_len);
2557 result_s += to_len;
2558 count -= 1;
2559
2560 for (i=0; i<count; i++) {
2561 *result_s++ = *self_s++;
2562 memcpy(result_s, to_s, to_len);
2563 result_s += to_len;
2564 }
2565
2566 /* Copy the rest of the original string */
2567 memcpy(result_s, self_s, self_len-i);
2568
2569 return result;
2570}
2571
2572/* Special case for deleting a single character */
2573/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2574static PyStringObject *
2575replace_delete_single_character(PyStringObject *self,
2576 char from_c, Py_ssize_t maxcount)
2577{
2578 char *self_s, *result_s;
2579 char *start, *next, *end;
2580 Py_ssize_t self_len, result_len;
2581 Py_ssize_t count;
2582 PyStringObject *result;
2583
2584 self_len = PyString_GET_SIZE(self);
2585 self_s = PyString_AS_STRING(self);
2586
2587 count = countchar(self_s, self_len, from_c);
2588 if (count == 0) {
2589 return return_self(self);
2590 }
2591 if (count > maxcount)
2592 count = maxcount;
2593
2594 result_len = self_len - count; /* from_len == 1 */
2595 assert(result_len>=0);
2596
2597 if ( (result = (PyStringObject *)
2598 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2599 return NULL;
2600 result_s = PyString_AS_STRING(result);
2601
2602 start = self_s;
2603 end = self_s + self_len;
2604 while (count-- > 0) {
2605 next = findchar(start, end-start, from_c);
2606 if (next == NULL)
2607 break;
2608 memcpy(result_s, start, next-start);
2609 result_s += (next-start);
2610 start = next+1;
2611 }
2612 memcpy(result_s, start, end-start);
2613
2614 return result;
2615}
2616
2617/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2618
2619static PyStringObject *
2620replace_delete_substring(PyStringObject *self, PyStringObject *from,
2621 Py_ssize_t maxcount) {
2622 char *self_s, *from_s, *result_s;
2623 char *start, *next, *end;
2624 Py_ssize_t self_len, from_len, result_len;
2625 Py_ssize_t count, offset;
2626 PyStringObject *result;
2627
2628 self_len = PyString_GET_SIZE(self);
2629 self_s = PyString_AS_STRING(self);
2630 from_len = PyString_GET_SIZE(from);
2631 from_s = PyString_AS_STRING(from);
2632
2633 count = countstring(self_s, self_len,
2634 from_s, from_len,
2635 0, self_len, 1);
2636
2637 if (count > maxcount)
2638 count = maxcount;
2639
2640 if (count == 0) {
2641 /* no matches */
2642 return return_self(self);
2643 }
2644
2645 result_len = self_len - (count * from_len);
2646 assert (result_len>=0);
2647
2648 if ( (result = (PyStringObject *)
2649 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2650 return NULL;
2651
2652 result_s = PyString_AS_STRING(result);
2653
2654 start = self_s;
2655 end = self_s + self_len;
2656 while (count-- > 0) {
2657 offset = findstring(start, end-start,
2658 from_s, from_len,
2659 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002660 if (offset == -1)
2661 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002662 next = start + offset;
2663
2664 memcpy(result_s, start, next-start);
2665
2666 result_s += (next-start);
2667 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002668 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002669 memcpy(result_s, start, end-start);
2670 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002671}
2672
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002673/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2674static PyStringObject *
2675replace_single_character_in_place(PyStringObject *self,
2676 char from_c, char to_c,
2677 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002678{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002679 char *self_s, *result_s, *start, *end, *next;
2680 Py_ssize_t self_len;
2681 PyStringObject *result;
2682
2683 /* The result string will be the same size */
2684 self_s = PyString_AS_STRING(self);
2685 self_len = PyString_GET_SIZE(self);
2686
2687 next = findchar(self_s, self_len, from_c);
2688
2689 if (next == NULL) {
2690 /* No matches; return the original string */
2691 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002692 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002693
2694 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002695 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002696 if (result == NULL)
2697 return NULL;
2698 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002699 memcpy(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002700
2701 /* change everything in-place, starting with this one */
2702 start = result_s + (next-self_s);
2703 *start = to_c;
2704 start++;
2705 end = result_s + self_len;
2706
2707 while (--maxcount > 0) {
2708 next = findchar(start, end-start, from_c);
2709 if (next == NULL)
2710 break;
2711 *next = to_c;
2712 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002713 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002714
2715 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002716}
2717
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002718/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2719static PyStringObject *
2720replace_substring_in_place(PyStringObject *self,
2721 PyStringObject *from,
2722 PyStringObject *to,
2723 Py_ssize_t maxcount)
2724{
2725 char *result_s, *start, *end;
2726 char *self_s, *from_s, *to_s;
2727 Py_ssize_t self_len, from_len, offset;
2728 PyStringObject *result;
2729
2730 /* The result string will be the same size */
2731
2732 self_s = PyString_AS_STRING(self);
2733 self_len = PyString_GET_SIZE(self);
2734
2735 from_s = PyString_AS_STRING(from);
2736 from_len = PyString_GET_SIZE(from);
2737 to_s = PyString_AS_STRING(to);
2738
2739 offset = findstring(self_s, self_len,
2740 from_s, from_len,
2741 0, self_len, FORWARD);
2742
2743 if (offset == -1) {
2744 /* No matches; return the original string */
2745 return return_self(self);
2746 }
2747
2748 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002749 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002750 if (result == NULL)
2751 return NULL;
2752 result_s = PyString_AS_STRING(result);
Andrew Dalke8c909102006-05-25 17:53:00 +00002753 memcpy(result_s, self_s, self_len);
2754
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002755
2756 /* change everything in-place, starting with this one */
2757 start = result_s + offset;
2758 memcpy(start, to_s, from_len);
2759 start += from_len;
2760 end = result_s + self_len;
2761
2762 while ( --maxcount > 0) {
2763 offset = findstring(start, end-start,
2764 from_s, from_len,
2765 0, end-start, FORWARD);
2766 if (offset==-1)
2767 break;
2768 memcpy(start+offset, to_s, from_len);
2769 start += offset+from_len;
2770 }
2771
2772 return result;
2773}
2774
2775/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2776static PyStringObject *
2777replace_single_character(PyStringObject *self,
2778 char from_c,
2779 PyStringObject *to,
2780 Py_ssize_t maxcount)
2781{
2782 char *self_s, *to_s, *result_s;
2783 char *start, *next, *end;
2784 Py_ssize_t self_len, to_len, result_len;
2785 Py_ssize_t count, product;
2786 PyStringObject *result;
2787
2788 self_s = PyString_AS_STRING(self);
2789 self_len = PyString_GET_SIZE(self);
2790
2791 count = countchar(self_s, self_len, from_c);
2792 if (count > maxcount)
2793 count = maxcount;
2794
2795 if (count == 0) {
2796 /* no matches, return unchanged */
2797 return return_self(self);
2798 }
2799
2800 to_s = PyString_AS_STRING(to);
2801 to_len = PyString_GET_SIZE(to);
2802
2803 /* use the difference between current and new, hence the "-1" */
2804 /* result_len = self_len + count * (to_len-1) */
2805 product = count * (to_len-1);
2806 if (product / (to_len-1) != count) {
2807 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2808 return NULL;
2809 }
2810 result_len = self_len + product;
2811 if (result_len < 0) {
2812 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2813 return NULL;
2814 }
2815
2816 if ( (result = (PyStringObject *)
2817 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2818 return NULL;
2819 result_s = PyString_AS_STRING(result);
2820
2821 start = self_s;
2822 end = self_s + self_len;
2823 while (count-- > 0) {
2824 next = findchar(start, end-start, from_c);
2825 if (next == NULL)
2826 break;
2827
2828 if (next == start) {
2829 /* replace with the 'to' */
2830 memcpy(result_s, to_s, to_len);
2831 result_s += to_len;
2832 start += 1;
2833 } else {
2834 /* copy the unchanged old then the 'to' */
2835 memcpy(result_s, start, next-start);
2836 result_s += (next-start);
2837 memcpy(result_s, to_s, to_len);
2838 result_s += to_len;
2839 start = next+1;
2840 }
2841 }
2842 /* Copy the remainder of the remaining string */
2843 memcpy(result_s, start, end-start);
2844
2845 return result;
2846}
2847
2848/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2849static PyStringObject *
2850replace_substring(PyStringObject *self,
2851 PyStringObject *from,
2852 PyStringObject *to,
2853 Py_ssize_t maxcount) {
2854 char *self_s, *from_s, *to_s, *result_s;
2855 char *start, *next, *end;
2856 Py_ssize_t self_len, from_len, to_len, result_len;
2857 Py_ssize_t count, offset, product;
2858 PyStringObject *result;
2859
2860 self_s = PyString_AS_STRING(self);
2861 self_len = PyString_GET_SIZE(self);
2862 from_s = PyString_AS_STRING(from);
2863 from_len = PyString_GET_SIZE(from);
2864
2865 count = countstring(self_s, self_len,
2866 from_s, from_len,
2867 0, self_len, FORWARD);
2868 if (count > maxcount)
2869 count = maxcount;
2870
2871 if (count == 0) {
2872 /* no matches, return unchanged */
2873 return return_self(self);
2874 }
2875
2876 to_s = PyString_AS_STRING(to);
2877 to_len = PyString_GET_SIZE(to);
2878
2879 /* Check for overflow */
2880 /* result_len = self_len + count * (to_len-from_len) */
2881 product = count * (to_len-from_len);
2882 if (product / (to_len-from_len) != count) {
2883 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2884 return NULL;
2885 }
2886 result_len = self_len + product;
2887 if (result_len < 0) {
2888 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2889 return NULL;
2890 }
2891
2892 if ( (result = (PyStringObject *)
2893 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2894 return NULL;
2895 result_s = PyString_AS_STRING(result);
2896
2897 start = self_s;
2898 end = self_s + self_len;
2899 while (count-- > 0) {
2900 offset = findstring(start, end-start,
2901 from_s, from_len,
2902 0, end-start, FORWARD);
2903 if (offset == -1)
2904 break;
2905 next = start+offset;
2906 if (next == start) {
2907 /* replace with the 'to' */
2908 memcpy(result_s, to_s, to_len);
2909 result_s += to_len;
2910 start += from_len;
2911 } else {
2912 /* copy the unchanged old then the 'to' */
2913 memcpy(result_s, start, next-start);
2914 result_s += (next-start);
2915 memcpy(result_s, to_s, to_len);
2916 result_s += to_len;
2917 start = next+from_len;
2918 }
2919 }
2920 /* Copy the remainder of the remaining string */
2921 memcpy(result_s, start, end-start);
2922
2923 return result;
2924}
2925
2926
2927static PyStringObject *
2928replace(PyStringObject *self,
2929 PyStringObject *from,
2930 PyStringObject *to,
2931 Py_ssize_t maxcount)
2932{
2933 Py_ssize_t from_len, to_len;
2934
2935 if (maxcount < 0) {
2936 maxcount = PY_SSIZE_T_MAX;
2937 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2938 /* nothing to do; return the original string */
2939 return return_self(self);
2940 }
2941
2942 from_len = PyString_GET_SIZE(from);
2943 to_len = PyString_GET_SIZE(to);
2944
2945 if (maxcount == 0 ||
2946 (from_len == 0 && to_len == 0)) {
2947 /* nothing to do; return the original string */
2948 return return_self(self);
2949 }
2950
2951 /* Handle zero-length special cases */
2952
2953 if (from_len == 0) {
2954 /* insert the 'to' string everywhere. */
2955 /* >>> "Python".replace("", ".") */
2956 /* '.P.y.t.h.o.n.' */
2957 return replace_interleave(self, to, maxcount);
2958 }
2959
2960 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2961 /* point for an empty self string to generate a non-empty string */
2962 /* Special case so the remaining code always gets a non-empty string */
2963 if (PyString_GET_SIZE(self) == 0) {
2964 return return_self(self);
2965 }
2966
2967 if (to_len == 0) {
2968 /* delete all occurances of 'from' string */
2969 if (from_len == 1) {
2970 return replace_delete_single_character(
2971 self, PyString_AS_STRING(from)[0], maxcount);
2972 } else {
2973 return replace_delete_substring(self, from, maxcount);
2974 }
2975 }
2976
2977 /* Handle special case where both strings have the same length */
2978
2979 if (from_len == to_len) {
2980 if (from_len == 1) {
2981 return replace_single_character_in_place(
2982 self,
2983 PyString_AS_STRING(from)[0],
2984 PyString_AS_STRING(to)[0],
2985 maxcount);
2986 } else {
2987 return replace_substring_in_place(
2988 self, from, to, maxcount);
2989 }
2990 }
2991
2992 /* Otherwise use the more generic algorithms */
2993 if (from_len == 1) {
2994 return replace_single_character(self, PyString_AS_STRING(from)[0],
2995 to, maxcount);
2996 } else {
2997 /* len('from')>=2, len('to')>=1 */
2998 return replace_substring(self, from, to, maxcount);
2999 }
3000}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003001
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003002PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003003"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003004\n\
3005Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003006old replaced by new. If the optional argument count is\n\
3007given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003008
3009static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003010string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003011{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003012 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003013 PyObject *from, *to;
3014 char *tmp_s;
3015 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003016
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003017 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003018 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003019
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003020 if (PyString_Check(from)) {
3021 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003022 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003023#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003024 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003025 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003026 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003027#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003028 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003029 return NULL;
3030
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003031 if (PyString_Check(to)) {
3032 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003033 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003034#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003035 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003036 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003037 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003038#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003039 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003040 return NULL;
3041
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003042 return (PyObject *)replace((PyStringObject *) self,
3043 (PyStringObject *) from,
3044 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003045}
3046
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003047/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003048
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003049PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003050"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003051\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003052Return True if S starts with the specified prefix, False otherwise.\n\
3053With optional start, test S beginning at that position.\n\
3054With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003055
3056static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003057string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003058{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003059 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003060 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003061 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003062 Py_ssize_t plen;
3063 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003064 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003065 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003066
Guido van Rossumc6821402000-05-08 14:08:05 +00003067 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3068 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003069 return NULL;
3070 if (PyString_Check(subobj)) {
3071 prefix = PyString_AS_STRING(subobj);
3072 plen = PyString_GET_SIZE(subobj);
3073 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003074#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003075 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003076 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003077 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003078 subobj, start, end, -1);
3079 if (rc == -1)
3080 return NULL;
3081 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003082 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003083 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003084#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003085 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003086 return NULL;
3087
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003088 string_adjust_indices(&start, &end, len);
3089
3090 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003091 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003092
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003093 if (end-start >= plen)
3094 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
3095 else
3096 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003097}
3098
3099
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003100PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003101"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003102\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003103Return True if S ends with the specified suffix, False otherwise.\n\
3104With optional start, test S beginning at that position.\n\
3105With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003106
3107static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003108string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003109{
Guido van Rossum4c08d552000-03-10 22:55:18 +00003110 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003111 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003112 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003113 Py_ssize_t slen;
3114 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003115 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003116 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003117
Guido van Rossumc6821402000-05-08 14:08:05 +00003118 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3119 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003120 return NULL;
3121 if (PyString_Check(subobj)) {
3122 suffix = PyString_AS_STRING(subobj);
3123 slen = PyString_GET_SIZE(subobj);
3124 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003125#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003126 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003127 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003128 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003129 subobj, start, end, +1);
3130 if (rc == -1)
3131 return NULL;
3132 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00003133 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00003134 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003135#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00003136 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003137 return NULL;
3138
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003139 string_adjust_indices(&start, &end, len);
3140
3141 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003142 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003143
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003144 if (end-slen > start)
3145 start = end - slen;
3146 if (end-start >= slen)
3147 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
3148 else
3149 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003150}
3151
3152
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003153PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003154"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003155\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003156Encodes S using the codec registered for encoding. encoding defaults\n\
3157to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003158handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003159a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3160'xmlcharrefreplace' as well as any other name registered with\n\
3161codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003162
3163static PyObject *
3164string_encode(PyStringObject *self, PyObject *args)
3165{
3166 char *encoding = NULL;
3167 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003168 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003169
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003170 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3171 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003172 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003173 if (v == NULL)
3174 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003175 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3176 PyErr_Format(PyExc_TypeError,
3177 "encoder did not return a string/unicode object "
3178 "(type=%.400s)",
3179 v->ob_type->tp_name);
3180 Py_DECREF(v);
3181 return NULL;
3182 }
3183 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003184
3185 onError:
3186 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003187}
3188
3189
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003190PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003191"S.decode([encoding[,errors]]) -> object\n\
3192\n\
3193Decodes S using the codec registered for encoding. encoding defaults\n\
3194to the default encoding. errors may be given to set a different error\n\
3195handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003196a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3197as well as any other name registerd with codecs.register_error that is\n\
3198able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003199
3200static PyObject *
3201string_decode(PyStringObject *self, PyObject *args)
3202{
3203 char *encoding = NULL;
3204 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003205 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003206
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003207 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3208 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003209 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003210 if (v == NULL)
3211 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003212 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3213 PyErr_Format(PyExc_TypeError,
3214 "decoder did not return a string/unicode object "
3215 "(type=%.400s)",
3216 v->ob_type->tp_name);
3217 Py_DECREF(v);
3218 return NULL;
3219 }
3220 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003221
3222 onError:
3223 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003224}
3225
3226
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003227PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003228"S.expandtabs([tabsize]) -> string\n\
3229\n\
3230Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003231If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003232
3233static PyObject*
3234string_expandtabs(PyStringObject *self, PyObject *args)
3235{
3236 const char *e, *p;
3237 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003238 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003239 PyObject *u;
3240 int tabsize = 8;
3241
3242 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3243 return NULL;
3244
Thomas Wouters7e474022000-07-16 12:04:32 +00003245 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003246 i = j = 0;
3247 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3248 for (p = PyString_AS_STRING(self); p < e; p++)
3249 if (*p == '\t') {
3250 if (tabsize > 0)
3251 j += tabsize - (j % tabsize);
3252 }
3253 else {
3254 j++;
3255 if (*p == '\n' || *p == '\r') {
3256 i += j;
3257 j = 0;
3258 }
3259 }
3260
3261 /* Second pass: create output string and fill it */
3262 u = PyString_FromStringAndSize(NULL, i + j);
3263 if (!u)
3264 return NULL;
3265
3266 j = 0;
3267 q = PyString_AS_STRING(u);
3268
3269 for (p = PyString_AS_STRING(self); p < e; p++)
3270 if (*p == '\t') {
3271 if (tabsize > 0) {
3272 i = tabsize - (j % tabsize);
3273 j += i;
3274 while (i--)
3275 *q++ = ' ';
3276 }
3277 }
3278 else {
3279 j++;
3280 *q++ = *p;
3281 if (*p == '\n' || *p == '\r')
3282 j = 0;
3283 }
3284
3285 return u;
3286}
3287
Tim Peters8fa5dd02001-09-12 02:18:30 +00003288static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00003289pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003290{
3291 PyObject *u;
3292
3293 if (left < 0)
3294 left = 0;
3295 if (right < 0)
3296 right = 0;
3297
Tim Peters8fa5dd02001-09-12 02:18:30 +00003298 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003299 Py_INCREF(self);
3300 return (PyObject *)self;
3301 }
3302
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003303 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003304 left + PyString_GET_SIZE(self) + right);
3305 if (u) {
3306 if (left)
3307 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003308 memcpy(PyString_AS_STRING(u) + left,
3309 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003310 PyString_GET_SIZE(self));
3311 if (right)
3312 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3313 fill, right);
3314 }
3315
3316 return u;
3317}
3318
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003319PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003320"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003321"\n"
3322"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003323"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003324
3325static PyObject *
3326string_ljust(PyStringObject *self, PyObject *args)
3327{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003328 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003329 char fillchar = ' ';
3330
Thomas Wouters4abb3662006-04-19 14:50:15 +00003331 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003332 return NULL;
3333
Tim Peters8fa5dd02001-09-12 02:18:30 +00003334 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003335 Py_INCREF(self);
3336 return (PyObject*) self;
3337 }
3338
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003339 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003340}
3341
3342
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003343PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003344"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003345"\n"
3346"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003347"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003348
3349static PyObject *
3350string_rjust(PyStringObject *self, PyObject *args)
3351{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003352 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003353 char fillchar = ' ';
3354
Thomas Wouters4abb3662006-04-19 14:50:15 +00003355 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003356 return NULL;
3357
Tim Peters8fa5dd02001-09-12 02:18:30 +00003358 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003359 Py_INCREF(self);
3360 return (PyObject*) self;
3361 }
3362
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003363 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003364}
3365
3366
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003367PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003368"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003369"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003370"Return S centered in a string of length width. Padding is\n"
3371"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003372
3373static PyObject *
3374string_center(PyStringObject *self, PyObject *args)
3375{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003376 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003377 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003378 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003379
Thomas Wouters4abb3662006-04-19 14:50:15 +00003380 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003381 return NULL;
3382
Tim Peters8fa5dd02001-09-12 02:18:30 +00003383 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003384 Py_INCREF(self);
3385 return (PyObject*) self;
3386 }
3387
3388 marg = width - PyString_GET_SIZE(self);
3389 left = marg / 2 + (marg & width & 1);
3390
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003391 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003392}
3393
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003394PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003395"S.zfill(width) -> string\n"
3396"\n"
3397"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003398"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003399
3400static PyObject *
3401string_zfill(PyStringObject *self, PyObject *args)
3402{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003403 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003404 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003405 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003406 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003407
Thomas Wouters4abb3662006-04-19 14:50:15 +00003408 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003409 return NULL;
3410
3411 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003412 if (PyString_CheckExact(self)) {
3413 Py_INCREF(self);
3414 return (PyObject*) self;
3415 }
3416 else
3417 return PyString_FromStringAndSize(
3418 PyString_AS_STRING(self),
3419 PyString_GET_SIZE(self)
3420 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003421 }
3422
3423 fill = width - PyString_GET_SIZE(self);
3424
3425 s = pad(self, fill, 0, '0');
3426
3427 if (s == NULL)
3428 return NULL;
3429
3430 p = PyString_AS_STRING(s);
3431 if (p[fill] == '+' || p[fill] == '-') {
3432 /* move sign to beginning of string */
3433 p[0] = p[fill];
3434 p[fill] = '0';
3435 }
3436
3437 return (PyObject*) s;
3438}
3439
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003440PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003441"S.isspace() -> bool\n\
3442\n\
3443Return True if all characters in S are whitespace\n\
3444and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003445
3446static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003447string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003448{
Fred Drakeba096332000-07-09 07:04:36 +00003449 register const unsigned char *p
3450 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003451 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003452
Guido van Rossum4c08d552000-03-10 22:55:18 +00003453 /* Shortcut for single character strings */
3454 if (PyString_GET_SIZE(self) == 1 &&
3455 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003456 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003457
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003458 /* Special case for empty strings */
3459 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003460 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003461
Guido van Rossum4c08d552000-03-10 22:55:18 +00003462 e = p + PyString_GET_SIZE(self);
3463 for (; p < e; p++) {
3464 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003465 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003466 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003467 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003468}
3469
3470
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003471PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003472"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003473\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003474Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003475and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003476
3477static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003478string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003479{
Fred Drakeba096332000-07-09 07:04:36 +00003480 register const unsigned char *p
3481 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003482 register const unsigned char *e;
3483
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003484 /* Shortcut for single character strings */
3485 if (PyString_GET_SIZE(self) == 1 &&
3486 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003487 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003488
3489 /* Special case for empty strings */
3490 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003491 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003492
3493 e = p + PyString_GET_SIZE(self);
3494 for (; p < e; p++) {
3495 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003496 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003497 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003498 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003499}
3500
3501
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003502PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003503"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003504\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003505Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003506and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003507
3508static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003509string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003510{
Fred Drakeba096332000-07-09 07:04:36 +00003511 register const unsigned char *p
3512 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003513 register const unsigned char *e;
3514
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003515 /* Shortcut for single character strings */
3516 if (PyString_GET_SIZE(self) == 1 &&
3517 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003518 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003519
3520 /* Special case for empty strings */
3521 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003522 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003523
3524 e = p + PyString_GET_SIZE(self);
3525 for (; p < e; p++) {
3526 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003527 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003528 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003529 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003530}
3531
3532
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003533PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003534"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003535\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003536Return True if all characters in S are digits\n\
3537and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003538
3539static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003540string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003541{
Fred Drakeba096332000-07-09 07:04:36 +00003542 register const unsigned char *p
3543 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003544 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003545
Guido van Rossum4c08d552000-03-10 22:55:18 +00003546 /* Shortcut for single character strings */
3547 if (PyString_GET_SIZE(self) == 1 &&
3548 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003549 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003550
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003551 /* Special case for empty strings */
3552 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003553 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003554
Guido van Rossum4c08d552000-03-10 22:55:18 +00003555 e = p + PyString_GET_SIZE(self);
3556 for (; p < e; p++) {
3557 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003558 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003559 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003560 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561}
3562
3563
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003564PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003565"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003566\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003567Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003568at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003569
3570static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003571string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003572{
Fred Drakeba096332000-07-09 07:04:36 +00003573 register const unsigned char *p
3574 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003575 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003576 int cased;
3577
Guido van Rossum4c08d552000-03-10 22:55:18 +00003578 /* Shortcut for single character strings */
3579 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003581
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003582 /* Special case for empty strings */
3583 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003584 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003585
Guido van Rossum4c08d552000-03-10 22:55:18 +00003586 e = p + PyString_GET_SIZE(self);
3587 cased = 0;
3588 for (; p < e; p++) {
3589 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003590 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003591 else if (!cased && islower(*p))
3592 cased = 1;
3593 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003594 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003595}
3596
3597
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003598PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003599"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003600\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003601Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003602at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003603
3604static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003605string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003606{
Fred Drakeba096332000-07-09 07:04:36 +00003607 register const unsigned char *p
3608 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003609 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003610 int cased;
3611
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612 /* Shortcut for single character strings */
3613 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003614 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003615
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003616 /* Special case for empty strings */
3617 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003618 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003619
Guido van Rossum4c08d552000-03-10 22:55:18 +00003620 e = p + PyString_GET_SIZE(self);
3621 cased = 0;
3622 for (; p < e; p++) {
3623 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003624 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003625 else if (!cased && isupper(*p))
3626 cased = 1;
3627 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003628 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629}
3630
3631
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003632PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003633"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003634\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003635Return True if S is a titlecased string and there is at least one\n\
3636character in S, i.e. uppercase characters may only follow uncased\n\
3637characters and lowercase characters only cased ones. Return False\n\
3638otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003639
3640static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003641string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003642{
Fred Drakeba096332000-07-09 07:04:36 +00003643 register const unsigned char *p
3644 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003645 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646 int cased, previous_is_cased;
3647
Guido van Rossum4c08d552000-03-10 22:55:18 +00003648 /* Shortcut for single character strings */
3649 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003650 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003651
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003652 /* Special case for empty strings */
3653 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003654 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003655
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656 e = p + PyString_GET_SIZE(self);
3657 cased = 0;
3658 previous_is_cased = 0;
3659 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003660 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661
3662 if (isupper(ch)) {
3663 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003664 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003665 previous_is_cased = 1;
3666 cased = 1;
3667 }
3668 else if (islower(ch)) {
3669 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003670 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671 previous_is_cased = 1;
3672 cased = 1;
3673 }
3674 else
3675 previous_is_cased = 0;
3676 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003677 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003678}
3679
3680
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003681PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003682"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683\n\
3684Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003685Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003686is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687
Guido van Rossum4c08d552000-03-10 22:55:18 +00003688static PyObject*
3689string_splitlines(PyStringObject *self, PyObject *args)
3690{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003691 register Py_ssize_t i;
3692 register Py_ssize_t j;
3693 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003694 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695 PyObject *list;
3696 PyObject *str;
3697 char *data;
3698
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003699 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700 return NULL;
3701
3702 data = PyString_AS_STRING(self);
3703 len = PyString_GET_SIZE(self);
3704
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705 list = PyList_New(0);
3706 if (!list)
3707 goto onError;
3708
3709 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003710 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003711
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 /* Find a line and append it */
3713 while (i < len && data[i] != '\n' && data[i] != '\r')
3714 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715
3716 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003717 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003718 if (i < len) {
3719 if (data[i] == '\r' && i + 1 < len &&
3720 data[i+1] == '\n')
3721 i += 2;
3722 else
3723 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003724 if (keepends)
3725 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003726 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003727 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728 j = i;
3729 }
3730 if (j < len) {
3731 SPLIT_APPEND(data, j, len);
3732 }
3733
3734 return list;
3735
3736 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003737 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738 return NULL;
3739}
3740
3741#undef SPLIT_APPEND
3742
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003743static PyObject *
3744string_getnewargs(PyStringObject *v)
3745{
3746 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3747}
3748
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003749
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003750static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003751string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003752 /* Counterparts of the obsolete stropmodule functions; except
3753 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003754 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3755 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003756 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003757 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3758 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003759 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3760 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3761 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3762 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3763 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3764 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3765 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003766 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3767 capitalize__doc__},
3768 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3769 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3770 endswith__doc__},
3771 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3772 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3773 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3774 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3775 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3776 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3777 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3778 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3779 startswith__doc__},
3780 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3781 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3782 swapcase__doc__},
3783 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3784 translate__doc__},
3785 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3786 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3787 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3788 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3789 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3790 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3791 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3792 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3793 expandtabs__doc__},
3794 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3795 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003796 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003797 {NULL, NULL} /* sentinel */
3798};
3799
Jeremy Hylton938ace62002-07-17 16:30:39 +00003800static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003801str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3802
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003803static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003804string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003805{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003806 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003807 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003808
Guido van Rossumae960af2001-08-30 03:11:59 +00003809 if (type != &PyString_Type)
3810 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003811 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3812 return NULL;
3813 if (x == NULL)
3814 return PyString_FromString("");
3815 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003816}
3817
Guido van Rossumae960af2001-08-30 03:11:59 +00003818static PyObject *
3819str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3820{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003821 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003822 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003823
3824 assert(PyType_IsSubtype(type, &PyString_Type));
3825 tmp = string_new(&PyString_Type, args, kwds);
3826 if (tmp == NULL)
3827 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003828 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003829 n = PyString_GET_SIZE(tmp);
3830 pnew = type->tp_alloc(type, n);
3831 if (pnew != NULL) {
3832 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003833 ((PyStringObject *)pnew)->ob_shash =
3834 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003835 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003836 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003837 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003838 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003839}
3840
Guido van Rossumcacfc072002-05-24 19:01:59 +00003841static PyObject *
3842basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3843{
3844 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003845 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003846 return NULL;
3847}
3848
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003849static PyObject *
3850string_mod(PyObject *v, PyObject *w)
3851{
3852 if (!PyString_Check(v)) {
3853 Py_INCREF(Py_NotImplemented);
3854 return Py_NotImplemented;
3855 }
3856 return PyString_Format(v, w);
3857}
3858
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003859PyDoc_STRVAR(basestring_doc,
3860"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003861
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003862static PyNumberMethods string_as_number = {
3863 0, /*nb_add*/
3864 0, /*nb_subtract*/
3865 0, /*nb_multiply*/
3866 0, /*nb_divide*/
3867 string_mod, /*nb_remainder*/
3868};
3869
3870
Guido van Rossumcacfc072002-05-24 19:01:59 +00003871PyTypeObject PyBaseString_Type = {
3872 PyObject_HEAD_INIT(&PyType_Type)
3873 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003874 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003875 0,
3876 0,
3877 0, /* tp_dealloc */
3878 0, /* tp_print */
3879 0, /* tp_getattr */
3880 0, /* tp_setattr */
3881 0, /* tp_compare */
3882 0, /* tp_repr */
3883 0, /* tp_as_number */
3884 0, /* tp_as_sequence */
3885 0, /* tp_as_mapping */
3886 0, /* tp_hash */
3887 0, /* tp_call */
3888 0, /* tp_str */
3889 0, /* tp_getattro */
3890 0, /* tp_setattro */
3891 0, /* tp_as_buffer */
3892 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3893 basestring_doc, /* tp_doc */
3894 0, /* tp_traverse */
3895 0, /* tp_clear */
3896 0, /* tp_richcompare */
3897 0, /* tp_weaklistoffset */
3898 0, /* tp_iter */
3899 0, /* tp_iternext */
3900 0, /* tp_methods */
3901 0, /* tp_members */
3902 0, /* tp_getset */
3903 &PyBaseObject_Type, /* tp_base */
3904 0, /* tp_dict */
3905 0, /* tp_descr_get */
3906 0, /* tp_descr_set */
3907 0, /* tp_dictoffset */
3908 0, /* tp_init */
3909 0, /* tp_alloc */
3910 basestring_new, /* tp_new */
3911 0, /* tp_free */
3912};
3913
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003914PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003915"str(object) -> string\n\
3916\n\
3917Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003918If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003919
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003920PyTypeObject PyString_Type = {
3921 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003922 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003923 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003924 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003925 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00003926 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003927 (printfunc)string_print, /* tp_print */
3928 0, /* tp_getattr */
3929 0, /* tp_setattr */
3930 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00003931 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003932 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003933 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003934 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003935 (hashfunc)string_hash, /* tp_hash */
3936 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00003937 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003938 PyObject_GenericGetAttr, /* tp_getattro */
3939 0, /* tp_setattro */
3940 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00003941 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003942 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003943 string_doc, /* tp_doc */
3944 0, /* tp_traverse */
3945 0, /* tp_clear */
3946 (richcmpfunc)string_richcompare, /* tp_richcompare */
3947 0, /* tp_weaklistoffset */
3948 0, /* tp_iter */
3949 0, /* tp_iternext */
3950 string_methods, /* tp_methods */
3951 0, /* tp_members */
3952 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003953 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003954 0, /* tp_dict */
3955 0, /* tp_descr_get */
3956 0, /* tp_descr_set */
3957 0, /* tp_dictoffset */
3958 0, /* tp_init */
3959 0, /* tp_alloc */
3960 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003961 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003962};
3963
3964void
Fred Drakeba096332000-07-09 07:04:36 +00003965PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003966{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003967 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003968 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003969 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003970 if (w == NULL || !PyString_Check(*pv)) {
3971 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003972 *pv = NULL;
3973 return;
3974 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003975 v = string_concat((PyStringObject *) *pv, w);
3976 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003977 *pv = v;
3978}
3979
Guido van Rossum013142a1994-08-30 08:19:36 +00003980void
Fred Drakeba096332000-07-09 07:04:36 +00003981PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003982{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003983 PyString_Concat(pv, w);
3984 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003985}
3986
3987
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003988/* The following function breaks the notion that strings are immutable:
3989 it changes the size of a string. We get away with this only if there
3990 is only one module referencing the object. You can also think of it
3991 as creating a new string object and destroying the old one, only
3992 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003993 already be known to some other part of the code...
3994 Note that if there's not enough memory to resize the string, the original
3995 string object at *pv is deallocated, *pv is set to NULL, an "out of
3996 memory" exception is set, and -1 is returned. Else (on success) 0 is
3997 returned, and the value in *pv may or may not be the same as on input.
3998 As always, an extra byte is allocated for a trailing \0 byte (newsize
3999 does *not* include that), and a trailing \0 byte is stored.
4000*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004001
4002int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004003_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004004{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004005 register PyObject *v;
4006 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004007 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004008 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4009 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004010 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004011 Py_DECREF(v);
4012 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004013 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004014 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004015 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004016 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004017 _Py_ForgetReference(v);
4018 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004019 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004020 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004021 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004022 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004023 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004025 _Py_NewReference(*pv);
4026 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004027 sv->ob_size = newsize;
4028 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004029 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004030 return 0;
4031}
Guido van Rossume5372401993-03-16 12:15:04 +00004032
4033/* Helpers for formatstring */
4034
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004035static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00004036getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004037{
Thomas Wouters977485d2006-02-16 15:59:12 +00004038 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004039 if (argidx < arglen) {
4040 (*p_argidx)++;
4041 if (arglen < 0)
4042 return args;
4043 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004044 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004045 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004046 PyErr_SetString(PyExc_TypeError,
4047 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004048 return NULL;
4049}
4050
Tim Peters38fd5b62000-09-21 05:43:11 +00004051/* Format codes
4052 * F_LJUST '-'
4053 * F_SIGN '+'
4054 * F_BLANK ' '
4055 * F_ALT '#'
4056 * F_ZERO '0'
4057 */
Guido van Rossume5372401993-03-16 12:15:04 +00004058#define F_LJUST (1<<0)
4059#define F_SIGN (1<<1)
4060#define F_BLANK (1<<2)
4061#define F_ALT (1<<3)
4062#define F_ZERO (1<<4)
4063
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004064static int
Fred Drakeba096332000-07-09 07:04:36 +00004065formatfloat(char *buf, size_t buflen, int flags,
4066 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004067{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004068 /* fmt = '%#.' + `prec` + `type`
4069 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004070 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004071 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004072 x = PyFloat_AsDouble(v);
4073 if (x == -1.0 && PyErr_Occurred()) {
4074 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004075 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004076 }
Guido van Rossume5372401993-03-16 12:15:04 +00004077 if (prec < 0)
4078 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004079 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4080 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004081 /* Worst case length calc to ensure no buffer overrun:
4082
4083 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004084 fmt = %#.<prec>g
4085 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004086 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004087 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004088
4089 'f' formats:
4090 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4091 len = 1 + 50 + 1 + prec = 52 + prec
4092
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004093 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004094 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004095
4096 */
4097 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4098 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004099 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004100 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004101 return -1;
4102 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004103 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4104 (flags&F_ALT) ? "#" : "",
4105 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004106 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004107 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004108}
4109
Tim Peters38fd5b62000-09-21 05:43:11 +00004110/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4111 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4112 * Python's regular ints.
4113 * Return value: a new PyString*, or NULL if error.
4114 * . *pbuf is set to point into it,
4115 * *plen set to the # of chars following that.
4116 * Caller must decref it when done using pbuf.
4117 * The string starting at *pbuf is of the form
4118 * "-"? ("0x" | "0X")? digit+
4119 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004120 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004121 * There will be at least prec digits, zero-filled on the left if
4122 * necessary to get that many.
4123 * val object to be converted
4124 * flags bitmask of format flags; only F_ALT is looked at
4125 * prec minimum number of digits; 0-fill on left if needed
4126 * type a character in [duoxX]; u acts the same as d
4127 *
4128 * CAUTION: o, x and X conversions on regular ints can never
4129 * produce a '-' sign, but can for Python's unbounded ints.
4130 */
4131PyObject*
4132_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4133 char **pbuf, int *plen)
4134{
4135 PyObject *result = NULL;
4136 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004137 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004138 int sign; /* 1 if '-', else 0 */
4139 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004140 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004141 int numdigits; /* len == numnondigits + numdigits */
4142 int numnondigits = 0;
4143
4144 switch (type) {
4145 case 'd':
4146 case 'u':
4147 result = val->ob_type->tp_str(val);
4148 break;
4149 case 'o':
4150 result = val->ob_type->tp_as_number->nb_oct(val);
4151 break;
4152 case 'x':
4153 case 'X':
4154 numnondigits = 2;
4155 result = val->ob_type->tp_as_number->nb_hex(val);
4156 break;
4157 default:
4158 assert(!"'type' not in [duoxX]");
4159 }
4160 if (!result)
4161 return NULL;
4162
4163 /* To modify the string in-place, there can only be one reference. */
4164 if (result->ob_refcnt != 1) {
4165 PyErr_BadInternalCall();
4166 return NULL;
4167 }
4168 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004169 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004170 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004171 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4172 return NULL;
4173 }
4174 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004175 if (buf[len-1] == 'L') {
4176 --len;
4177 buf[len] = '\0';
4178 }
4179 sign = buf[0] == '-';
4180 numnondigits += sign;
4181 numdigits = len - numnondigits;
4182 assert(numdigits > 0);
4183
Tim Petersfff53252001-04-12 18:38:48 +00004184 /* Get rid of base marker unless F_ALT */
4185 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004186 /* Need to skip 0x, 0X or 0. */
4187 int skipped = 0;
4188 switch (type) {
4189 case 'o':
4190 assert(buf[sign] == '0');
4191 /* If 0 is only digit, leave it alone. */
4192 if (numdigits > 1) {
4193 skipped = 1;
4194 --numdigits;
4195 }
4196 break;
4197 case 'x':
4198 case 'X':
4199 assert(buf[sign] == '0');
4200 assert(buf[sign + 1] == 'x');
4201 skipped = 2;
4202 numnondigits -= 2;
4203 break;
4204 }
4205 if (skipped) {
4206 buf += skipped;
4207 len -= skipped;
4208 if (sign)
4209 buf[0] = '-';
4210 }
4211 assert(len == numnondigits + numdigits);
4212 assert(numdigits > 0);
4213 }
4214
4215 /* Fill with leading zeroes to meet minimum width. */
4216 if (prec > numdigits) {
4217 PyObject *r1 = PyString_FromStringAndSize(NULL,
4218 numnondigits + prec);
4219 char *b1;
4220 if (!r1) {
4221 Py_DECREF(result);
4222 return NULL;
4223 }
4224 b1 = PyString_AS_STRING(r1);
4225 for (i = 0; i < numnondigits; ++i)
4226 *b1++ = *buf++;
4227 for (i = 0; i < prec - numdigits; i++)
4228 *b1++ = '0';
4229 for (i = 0; i < numdigits; i++)
4230 *b1++ = *buf++;
4231 *b1 = '\0';
4232 Py_DECREF(result);
4233 result = r1;
4234 buf = PyString_AS_STRING(result);
4235 len = numnondigits + prec;
4236 }
4237
4238 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004239 if (type == 'X') {
4240 /* Need to convert all lower case letters to upper case.
4241 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004242 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004243 if (buf[i] >= 'a' && buf[i] <= 'x')
4244 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004245 }
4246 *pbuf = buf;
4247 *plen = len;
4248 return result;
4249}
4250
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004251static int
Fred Drakeba096332000-07-09 07:04:36 +00004252formatint(char *buf, size_t buflen, int flags,
4253 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004254{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004255 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004256 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4257 + 1 + 1 = 24 */
4258 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004259 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004260 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004261
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004262 x = PyInt_AsLong(v);
4263 if (x == -1 && PyErr_Occurred()) {
4264 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004265 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004266 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004267 if (x < 0 && type == 'u') {
4268 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004269 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004270 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4271 sign = "-";
4272 else
4273 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004274 if (prec < 0)
4275 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004276
4277 if ((flags & F_ALT) &&
4278 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004279 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004280 * of issues that cause pain:
4281 * - when 0 is being converted, the C standard leaves off
4282 * the '0x' or '0X', which is inconsistent with other
4283 * %#x/%#X conversions and inconsistent with Python's
4284 * hex() function
4285 * - there are platforms that violate the standard and
4286 * convert 0 with the '0x' or '0X'
4287 * (Metrowerks, Compaq Tru64)
4288 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004289 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004290 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004291 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004292 * We can achieve the desired consistency by inserting our
4293 * own '0x' or '0X' prefix, and substituting %x/%X in place
4294 * of %#x/%#X.
4295 *
4296 * Note that this is the same approach as used in
4297 * formatint() in unicodeobject.c
4298 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004299 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4300 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004301 }
4302 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004303 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4304 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004305 prec, type);
4306 }
4307
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004308 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4309 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004310 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004311 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004312 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004313 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004314 return -1;
4315 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004316 if (sign[0])
4317 PyOS_snprintf(buf, buflen, fmt, -x);
4318 else
4319 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004320 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004321}
4322
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004323static int
Fred Drakeba096332000-07-09 07:04:36 +00004324formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004325{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004326 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004327 if (PyString_Check(v)) {
4328 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004329 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004330 }
4331 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004332 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004333 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004334 }
4335 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004336 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004337}
4338
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004339/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4340
4341 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4342 chars are formatted. XXX This is a magic number. Each formatting
4343 routine does bounds checking to ensure no overflow, but a better
4344 solution may be to malloc a buffer of appropriate size for each
4345 format. For now, the current solution is sufficient.
4346*/
4347#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004348
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004349PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004350PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004351{
4352 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004353 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004354 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004355 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004356 PyObject *result, *orig_args;
4357#ifdef Py_USING_UNICODE
4358 PyObject *v, *w;
4359#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004360 PyObject *dict = NULL;
4361 if (format == NULL || !PyString_Check(format) || args == NULL) {
4362 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004363 return NULL;
4364 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004365 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004366 fmt = PyString_AS_STRING(format);
4367 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004368 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004369 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004370 if (result == NULL)
4371 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004372 res = PyString_AsString(result);
4373 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004374 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004375 argidx = 0;
4376 }
4377 else {
4378 arglen = -1;
4379 argidx = -2;
4380 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004381 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4382 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004383 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004384 while (--fmtcnt >= 0) {
4385 if (*fmt != '%') {
4386 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004387 rescnt = fmtcnt + 100;
4388 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004389 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004390 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004391 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004392 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004393 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004394 }
4395 *res++ = *fmt++;
4396 }
4397 else {
4398 /* Got a format specifier */
4399 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004400 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004401 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004402 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004403 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004404 PyObject *v = NULL;
4405 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004406 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004407 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004408 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004409 char formatbuf[FORMATBUFLEN];
4410 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004411#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004412 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004413 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004414#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004415
Guido van Rossumda9c2711996-12-05 21:58:58 +00004416 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004417 if (*fmt == '(') {
4418 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004419 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004420 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004421 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004422
4423 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004424 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004425 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004426 goto error;
4427 }
4428 ++fmt;
4429 --fmtcnt;
4430 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004431 /* Skip over balanced parentheses */
4432 while (pcount > 0 && --fmtcnt >= 0) {
4433 if (*fmt == ')')
4434 --pcount;
4435 else if (*fmt == '(')
4436 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004437 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004438 }
4439 keylen = fmt - keystart - 1;
4440 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004441 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004442 "incomplete format key");
4443 goto error;
4444 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004445 key = PyString_FromStringAndSize(keystart,
4446 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004447 if (key == NULL)
4448 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004449 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004450 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004451 args_owned = 0;
4452 }
4453 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004454 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004455 if (args == NULL) {
4456 goto error;
4457 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004458 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004459 arglen = -1;
4460 argidx = -2;
4461 }
Guido van Rossume5372401993-03-16 12:15:04 +00004462 while (--fmtcnt >= 0) {
4463 switch (c = *fmt++) {
4464 case '-': flags |= F_LJUST; continue;
4465 case '+': flags |= F_SIGN; continue;
4466 case ' ': flags |= F_BLANK; continue;
4467 case '#': flags |= F_ALT; continue;
4468 case '0': flags |= F_ZERO; continue;
4469 }
4470 break;
4471 }
4472 if (c == '*') {
4473 v = getnextarg(args, arglen, &argidx);
4474 if (v == NULL)
4475 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004476 if (!PyInt_Check(v)) {
4477 PyErr_SetString(PyExc_TypeError,
4478 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004479 goto error;
4480 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004481 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004482 if (width < 0) {
4483 flags |= F_LJUST;
4484 width = -width;
4485 }
Guido van Rossume5372401993-03-16 12:15:04 +00004486 if (--fmtcnt >= 0)
4487 c = *fmt++;
4488 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004489 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004490 width = c - '0';
4491 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004492 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004493 if (!isdigit(c))
4494 break;
4495 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004496 PyErr_SetString(
4497 PyExc_ValueError,
4498 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004499 goto error;
4500 }
4501 width = width*10 + (c - '0');
4502 }
4503 }
4504 if (c == '.') {
4505 prec = 0;
4506 if (--fmtcnt >= 0)
4507 c = *fmt++;
4508 if (c == '*') {
4509 v = getnextarg(args, arglen, &argidx);
4510 if (v == NULL)
4511 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004512 if (!PyInt_Check(v)) {
4513 PyErr_SetString(
4514 PyExc_TypeError,
4515 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004516 goto error;
4517 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004518 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004519 if (prec < 0)
4520 prec = 0;
4521 if (--fmtcnt >= 0)
4522 c = *fmt++;
4523 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004524 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004525 prec = c - '0';
4526 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004527 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004528 if (!isdigit(c))
4529 break;
4530 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004531 PyErr_SetString(
4532 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004533 "prec too big");
4534 goto error;
4535 }
4536 prec = prec*10 + (c - '0');
4537 }
4538 }
4539 } /* prec */
4540 if (fmtcnt >= 0) {
4541 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004542 if (--fmtcnt >= 0)
4543 c = *fmt++;
4544 }
4545 }
4546 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004547 PyErr_SetString(PyExc_ValueError,
4548 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004549 goto error;
4550 }
4551 if (c != '%') {
4552 v = getnextarg(args, arglen, &argidx);
4553 if (v == NULL)
4554 goto error;
4555 }
4556 sign = 0;
4557 fill = ' ';
4558 switch (c) {
4559 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004560 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004561 len = 1;
4562 break;
4563 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004564#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004565 if (PyUnicode_Check(v)) {
4566 fmt = fmt_start;
4567 argidx = argidx_start;
4568 goto unicode;
4569 }
Georg Brandld45014b2005-10-01 17:06:00 +00004570#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004571 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004572#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004573 if (temp != NULL && PyUnicode_Check(temp)) {
4574 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004575 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004576 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004577 goto unicode;
4578 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004579#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004580 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004581 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004582 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004583 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004584 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004585 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004586 if (!PyString_Check(temp)) {
4587 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004588 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004589 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004590 goto error;
4591 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004592 pbuf = PyString_AS_STRING(temp);
4593 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004594 if (prec >= 0 && len > prec)
4595 len = prec;
4596 break;
4597 case 'i':
4598 case 'd':
4599 case 'u':
4600 case 'o':
4601 case 'x':
4602 case 'X':
4603 if (c == 'i')
4604 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004605 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004606 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004607 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004608 prec, c, &pbuf, &ilen);
4609 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004610 if (!temp)
4611 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004612 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004613 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004614 else {
4615 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004616 len = formatint(pbuf,
4617 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004618 flags, prec, c, v);
4619 if (len < 0)
4620 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004621 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004622 }
4623 if (flags & F_ZERO)
4624 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004625 break;
4626 case 'e':
4627 case 'E':
4628 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004629 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004630 case 'g':
4631 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004632 if (c == 'F')
4633 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004634 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004635 len = formatfloat(pbuf, sizeof(formatbuf),
4636 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004637 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004638 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004639 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004640 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004641 fill = '0';
4642 break;
4643 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004644#ifdef Py_USING_UNICODE
4645 if (PyUnicode_Check(v)) {
4646 fmt = fmt_start;
4647 argidx = argidx_start;
4648 goto unicode;
4649 }
4650#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004651 pbuf = formatbuf;
4652 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004653 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004654 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004655 break;
4656 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004657 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004658 "unsupported format character '%c' (0x%x) "
4659 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004660 c, c,
4661 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004662 goto error;
4663 }
4664 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004665 if (*pbuf == '-' || *pbuf == '+') {
4666 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004667 len--;
4668 }
4669 else if (flags & F_SIGN)
4670 sign = '+';
4671 else if (flags & F_BLANK)
4672 sign = ' ';
4673 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004674 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004675 }
4676 if (width < len)
4677 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004678 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004679 reslen -= rescnt;
4680 rescnt = width + fmtcnt + 100;
4681 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004682 if (reslen < 0) {
4683 Py_DECREF(result);
4684 return PyErr_NoMemory();
4685 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004686 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004687 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004688 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004689 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004690 }
4691 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004692 if (fill != ' ')
4693 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004694 rescnt--;
4695 if (width > len)
4696 width--;
4697 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004698 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4699 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004700 assert(pbuf[1] == c);
4701 if (fill != ' ') {
4702 *res++ = *pbuf++;
4703 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004704 }
Tim Petersfff53252001-04-12 18:38:48 +00004705 rescnt -= 2;
4706 width -= 2;
4707 if (width < 0)
4708 width = 0;
4709 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004710 }
4711 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004712 do {
4713 --rescnt;
4714 *res++ = fill;
4715 } while (--width > len);
4716 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004717 if (fill == ' ') {
4718 if (sign)
4719 *res++ = sign;
4720 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004721 (c == 'x' || c == 'X')) {
4722 assert(pbuf[0] == '0');
4723 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004724 *res++ = *pbuf++;
4725 *res++ = *pbuf++;
4726 }
4727 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004728 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004729 res += len;
4730 rescnt -= len;
4731 while (--width >= len) {
4732 --rescnt;
4733 *res++ = ' ';
4734 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004735 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004736 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004737 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004738 goto error;
4739 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004740 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004741 } /* '%' */
4742 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004743 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004744 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004745 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004746 goto error;
4747 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004748 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004749 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004750 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004751 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004752 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004753
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004754#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004755 unicode:
4756 if (args_owned) {
4757 Py_DECREF(args);
4758 args_owned = 0;
4759 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004760 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004761 if (PyTuple_Check(orig_args) && argidx > 0) {
4762 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004763 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004764 v = PyTuple_New(n);
4765 if (v == NULL)
4766 goto error;
4767 while (--n >= 0) {
4768 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4769 Py_INCREF(w);
4770 PyTuple_SET_ITEM(v, n, w);
4771 }
4772 args = v;
4773 } else {
4774 Py_INCREF(orig_args);
4775 args = orig_args;
4776 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004777 args_owned = 1;
4778 /* Take what we have of the result and let the Unicode formatting
4779 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004780 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004781 if (_PyString_Resize(&result, rescnt))
4782 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004783 fmtcnt = PyString_GET_SIZE(format) - \
4784 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004785 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4786 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004787 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004788 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004789 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004790 if (v == NULL)
4791 goto error;
4792 /* Paste what we have (result) to what the Unicode formatting
4793 function returned (v) and return the result (or error) */
4794 w = PyUnicode_Concat(result, v);
4795 Py_DECREF(result);
4796 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004797 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004798 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004799#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004800
Guido van Rossume5372401993-03-16 12:15:04 +00004801 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004802 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004803 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004804 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004805 }
Guido van Rossume5372401993-03-16 12:15:04 +00004806 return NULL;
4807}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004808
Guido van Rossum2a61e741997-01-18 07:55:05 +00004809void
Fred Drakeba096332000-07-09 07:04:36 +00004810PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004811{
4812 register PyStringObject *s = (PyStringObject *)(*p);
4813 PyObject *t;
4814 if (s == NULL || !PyString_Check(s))
4815 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004816 /* If it's a string subclass, we don't really know what putting
4817 it in the interned dict might do. */
4818 if (!PyString_CheckExact(s))
4819 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004820 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004821 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004822 if (interned == NULL) {
4823 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004824 if (interned == NULL) {
4825 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004826 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004827 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004828 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004829 t = PyDict_GetItem(interned, (PyObject *)s);
4830 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004831 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004832 Py_DECREF(*p);
4833 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004834 return;
4835 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004836
Armin Rigo79f7ad22004-08-07 19:27:39 +00004837 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004838 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004839 return;
4840 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004841 /* The two references in interned are not counted by refcnt.
4842 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004843 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004844 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004845}
4846
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004847void
4848PyString_InternImmortal(PyObject **p)
4849{
4850 PyString_InternInPlace(p);
4851 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4852 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4853 Py_INCREF(*p);
4854 }
4855}
4856
Guido van Rossum2a61e741997-01-18 07:55:05 +00004857
4858PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004859PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004860{
4861 PyObject *s = PyString_FromString(cp);
4862 if (s == NULL)
4863 return NULL;
4864 PyString_InternInPlace(&s);
4865 return s;
4866}
4867
Guido van Rossum8cf04761997-08-02 02:57:45 +00004868void
Fred Drakeba096332000-07-09 07:04:36 +00004869PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004870{
4871 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004872 for (i = 0; i < UCHAR_MAX + 1; i++) {
4873 Py_XDECREF(characters[i]);
4874 characters[i] = NULL;
4875 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004876 Py_XDECREF(nullstring);
4877 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004878}
Barry Warsawa903ad982001-02-23 16:40:48 +00004879
Barry Warsawa903ad982001-02-23 16:40:48 +00004880void _Py_ReleaseInternedStrings(void)
4881{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004882 PyObject *keys;
4883 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004884 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004885
4886 if (interned == NULL || !PyDict_Check(interned))
4887 return;
4888 keys = PyDict_Keys(interned);
4889 if (keys == NULL || !PyList_Check(keys)) {
4890 PyErr_Clear();
4891 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004892 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004893
4894 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4895 detector, interned strings are not forcibly deallocated; rather, we
4896 give them their stolen references back, and then clear and DECREF
4897 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004898
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004899 fprintf(stderr, "releasing interned strings\n");
4900 n = PyList_GET_SIZE(keys);
4901 for (i = 0; i < n; i++) {
4902 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4903 switch (s->ob_sstate) {
4904 case SSTATE_NOT_INTERNED:
4905 /* XXX Shouldn't happen */
4906 break;
4907 case SSTATE_INTERNED_IMMORTAL:
4908 s->ob_refcnt += 1;
4909 break;
4910 case SSTATE_INTERNED_MORTAL:
4911 s->ob_refcnt += 2;
4912 break;
4913 default:
4914 Py_FatalError("Inconsistent interned string state.");
4915 }
4916 s->ob_sstate = SSTATE_NOT_INTERNED;
4917 }
4918 Py_DECREF(keys);
4919 PyDict_Clear(interned);
4920 Py_DECREF(interned);
4921 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004922}