blob: 92477eea0c71e8151cfad83d203297788f05e534 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000770/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000774#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000778
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000807#ifdef __VMS
808 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
809#else
810 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
811#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000812 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814
Thomas Wouters7e474022000-07-16 12:04:32 +0000815 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000817 if (memchr(op->ob_sval, '\'', op->ob_size) &&
818 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 quote = '"';
820
821 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000822 for (i = 0; i < op->ob_size; i++) {
823 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000825 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000826 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000827 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000828 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000829 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000830 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000831 fprintf(fp, "\\r");
832 else if (c < ' ' || c >= 0x7f)
833 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000834 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000835 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000838 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000839}
840
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841PyObject *
842PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000844 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000845 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000846 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000847 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000848 PyErr_SetString(PyExc_OverflowError,
849 "string is too large to make repr");
850 }
851 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000853 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
855 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000856 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 register char c;
858 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000859 int quote;
860
Thomas Wouters7e474022000-07-16 12:04:32 +0000861 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000862 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000863 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000865 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000866 quote = '"';
867
Tim Peters9161c8b2001-12-03 01:55:38 +0000868 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000869 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000871 /* There's at least enough room for a hex escape
872 and a closing quote. */
873 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000875 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000876 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000877 else if (c == '\t')
878 *p++ = '\\', *p++ = 't';
879 else if (c == '\n')
880 *p++ = '\\', *p++ = 'n';
881 else if (c == '\r')
882 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000883 else if (c < ' ' || c >= 0x7f) {
884 /* For performance, we don't want to call
885 PyOS_snprintf here (extra layers of
886 function call). */
887 sprintf(p, "\\x%02x", c & 0xff);
888 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000889 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000890 else
891 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000893 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000894 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000896 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000897 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000898 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900}
901
Guido van Rossum189f1df2001-05-01 16:51:53 +0000902static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000903string_repr(PyObject *op)
904{
905 return PyString_Repr(op, 1);
906}
907
908static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000909string_str(PyObject *s)
910{
Tim Petersc9933152001-10-16 20:18:24 +0000911 assert(PyString_Check(s));
912 if (PyString_CheckExact(s)) {
913 Py_INCREF(s);
914 return s;
915 }
916 else {
917 /* Subtype -- return genuine string with the same value. */
918 PyStringObject *t = (PyStringObject *) s;
919 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
920 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921}
922
Martin v. Löwis18e16552006-02-15 17:27:45 +0000923static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000924string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925{
926 return a->ob_size;
927}
928
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000929static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000930string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000931{
Andrew Dalke598710c2006-05-25 18:18:39 +0000932 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000933 register PyStringObject *op;
934 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000935#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000936 if (PyUnicode_Check(bb))
937 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000938#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000939 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000940 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000941 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942 return NULL;
943 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000946 if ((a->ob_size == 0 || b->ob_size == 0) &&
947 PyString_CheckExact(a) && PyString_CheckExact(b)) {
948 if (a->ob_size == 0) {
949 Py_INCREF(bb);
950 return bb;
951 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
955 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000956 if (size < 0) {
957 PyErr_SetString(PyExc_OverflowError,
958 "strings are too large to concat");
959 return NULL;
960 }
961
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000962 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000963 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000966 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000967 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000968 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000969 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
970 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000971 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000972 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973#undef b
974}
975
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000977string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000979 register Py_ssize_t i;
980 register Py_ssize_t j;
981 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000983 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984 if (n < 0)
985 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000986 /* watch out for overflows: the size can overflow int,
987 * and the # of bytes needed can overflow size_t
988 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000990 if (n && size / n != a->ob_size) {
991 PyErr_SetString(PyExc_OverflowError,
992 "repeated string is too long");
993 return NULL;
994 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000995 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996 Py_INCREF(a);
997 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998 }
Tim Peterse7c05322004-06-27 17:24:49 +0000999 nbytes = (size_t)size;
1000 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001001 PyErr_SetString(PyExc_OverflowError,
1002 "repeated string is too long");
1003 return NULL;
1004 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001006 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001007 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001009 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001010 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001011 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001012 op->ob_sval[size] = '\0';
1013 if (a->ob_size == 1 && n > 0) {
1014 memset(op->ob_sval, a->ob_sval[0] , n);
1015 return (PyObject *) op;
1016 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001017 i = 0;
1018 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001019 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001020 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001021 }
1022 while (i < size) {
1023 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001024 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001025 i += j;
1026 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028}
1029
1030/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1031
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001032static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001033string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001035 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001036{
1037 if (i < 0)
1038 i = 0;
1039 if (j < 0)
1040 j = 0; /* Avoid signed/unsigned bug in next line */
1041 if (j > a->ob_size)
1042 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001043 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1044 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001045 Py_INCREF(a);
1046 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001047 }
1048 if (j < i)
1049 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001050 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051}
1052
Guido van Rossum9284a572000-03-07 15:53:43 +00001053static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001054string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001055{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001056 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001057#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001058 if (PyUnicode_Check(sub_obj))
1059 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001060#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001061 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001062 PyErr_SetString(PyExc_TypeError,
1063 "'in <string>' requires string as left operand");
1064 return -1;
1065 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001066 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001067
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001068 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001069}
1070
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001071static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001072string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001074 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001075 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001076 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001077 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078 return NULL;
1079 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001080 pchar = a->ob_sval[i];
1081 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001082 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001083 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001084 else {
1085#ifdef COUNT_ALLOCS
1086 one_strings++;
1087#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001088 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001089 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001090 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091}
1092
Martin v. Löwiscd353062001-05-24 16:56:35 +00001093static PyObject*
1094string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001095{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001096 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001097 Py_ssize_t len_a, len_b;
1098 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001099 PyObject *result;
1100
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001101 /* Make sure both arguments are strings. */
1102 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001103 result = Py_NotImplemented;
1104 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001105 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001106 if (a == b) {
1107 switch (op) {
1108 case Py_EQ:case Py_LE:case Py_GE:
1109 result = Py_True;
1110 goto out;
1111 case Py_NE:case Py_LT:case Py_GT:
1112 result = Py_False;
1113 goto out;
1114 }
1115 }
1116 if (op == Py_EQ) {
1117 /* Supporting Py_NE here as well does not save
1118 much time, since Py_NE is rarely used. */
1119 if (a->ob_size == b->ob_size
1120 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001121 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001122 a->ob_size) == 0)) {
1123 result = Py_True;
1124 } else {
1125 result = Py_False;
1126 }
1127 goto out;
1128 }
1129 len_a = a->ob_size; len_b = b->ob_size;
1130 min_len = (len_a < len_b) ? len_a : len_b;
1131 if (min_len > 0) {
1132 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1133 if (c==0)
1134 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1135 }else
1136 c = 0;
1137 if (c == 0)
1138 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1139 switch (op) {
1140 case Py_LT: c = c < 0; break;
1141 case Py_LE: c = c <= 0; break;
1142 case Py_EQ: assert(0); break; /* unreachable */
1143 case Py_NE: c = c != 0; break;
1144 case Py_GT: c = c > 0; break;
1145 case Py_GE: c = c >= 0; break;
1146 default:
1147 result = Py_NotImplemented;
1148 goto out;
1149 }
1150 result = c ? Py_True : Py_False;
1151 out:
1152 Py_INCREF(result);
1153 return result;
1154}
1155
1156int
1157_PyString_Eq(PyObject *o1, PyObject *o2)
1158{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001159 PyStringObject *a = (PyStringObject*) o1;
1160 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001161 return a->ob_size == b->ob_size
1162 && *a->ob_sval == *b->ob_sval
1163 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001164}
1165
Guido van Rossum9bfef441993-03-29 10:43:31 +00001166static long
Fred Drakeba096332000-07-09 07:04:36 +00001167string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001168{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001169 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001170 register unsigned char *p;
1171 register long x;
1172
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001173 if (a->ob_shash != -1)
1174 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001175 len = a->ob_size;
1176 p = (unsigned char *) a->ob_sval;
1177 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001179 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180 x ^= a->ob_size;
1181 if (x == -1)
1182 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001183 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001184 return x;
1185}
1186
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001187#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1188
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001189static PyObject*
1190string_subscript(PyStringObject* self, PyObject* item)
1191{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001192 PyNumberMethods *nb = item->ob_type->tp_as_number;
1193 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1194 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001195 if (i == -1 && PyErr_Occurred())
1196 return NULL;
1197 if (i < 0)
1198 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001199 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001200 }
1201 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001202 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001203 char* source_buf;
1204 char* result_buf;
1205 PyObject* result;
1206
Tim Petersae1d0c92006-03-17 03:29:34 +00001207 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001208 PyString_GET_SIZE(self),
1209 &start, &stop, &step, &slicelength) < 0) {
1210 return NULL;
1211 }
1212
1213 if (slicelength <= 0) {
1214 return PyString_FromStringAndSize("", 0);
1215 }
1216 else {
1217 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001218 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001219 if (result_buf == NULL)
1220 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001221
Tim Petersae1d0c92006-03-17 03:29:34 +00001222 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001223 cur += step, i++) {
1224 result_buf[i] = source_buf[cur];
1225 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001226
1227 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 slicelength);
1229 PyMem_Free(result_buf);
1230 return result;
1231 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001232 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001233 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001234 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001235 "string indices must be integers");
1236 return NULL;
1237 }
1238}
1239
Martin v. Löwis18e16552006-02-15 17:27:45 +00001240static Py_ssize_t
1241string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001242{
1243 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001244 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001245 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001246 return -1;
1247 }
1248 *ptr = (void *)self->ob_sval;
1249 return self->ob_size;
1250}
1251
Martin v. Löwis18e16552006-02-15 17:27:45 +00001252static Py_ssize_t
1253string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001254{
Guido van Rossum045e6881997-09-08 18:30:11 +00001255 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001256 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257 return -1;
1258}
1259
Martin v. Löwis18e16552006-02-15 17:27:45 +00001260static Py_ssize_t
1261string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001262{
1263 if ( lenp )
1264 *lenp = self->ob_size;
1265 return 1;
1266}
1267
Martin v. Löwis18e16552006-02-15 17:27:45 +00001268static Py_ssize_t
1269string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001270{
1271 if ( index != 0 ) {
1272 PyErr_SetString(PyExc_SystemError,
1273 "accessing non-existent string segment");
1274 return -1;
1275 }
1276 *ptr = self->ob_sval;
1277 return self->ob_size;
1278}
1279
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001280static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001281 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001282 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001283 (ssizeargfunc)string_repeat, /*sq_repeat*/
1284 (ssizeargfunc)string_item, /*sq_item*/
1285 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001286 0, /*sq_ass_item*/
1287 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001288 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001289};
1290
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001291static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001293 (binaryfunc)string_subscript,
1294 0,
1295};
1296
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001298 (readbufferproc)string_buffer_getreadbuf,
1299 (writebufferproc)string_buffer_getwritebuf,
1300 (segcountproc)string_buffer_getsegcount,
1301 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001302};
1303
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304
1305
1306#define LEFTSTRIP 0
1307#define RIGHTSTRIP 1
1308#define BOTHSTRIP 2
1309
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001310/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001311static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1312
1313#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001314
Andrew Dalke525eab32006-05-26 14:00:45 +00001315
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001316/* Don't call if length < 2 */
1317#define Py_STRING_MATCH(target, offset, pattern, length) \
1318 (target[offset] == pattern[0] && \
1319 target[offset+length-1] == pattern[length-1] && \
1320 !memcmp(target+offset+1, pattern+1, length-2) )
1321
1322
Andrew Dalke525eab32006-05-26 14:00:45 +00001323/* Overallocate the initial list to reduce the number of reallocs for small
1324 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1325 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1326 text (roughly 11 words per line) and field delimited data (usually 1-10
1327 fields). For large strings the split algorithms are bandwidth limited
1328 so increasing the preallocation likely will not improve things.*/
1329
1330#define MAX_PREALLOC 12
1331
1332/* 5 splits gives 6 elements */
1333#define PREALLOC_SIZE(maxsplit) \
1334 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1335
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001336#define SPLIT_APPEND(data, left, right) \
1337 str = PyString_FromStringAndSize((data) + (left), \
1338 (right) - (left)); \
1339 if (str == NULL) \
1340 goto onError; \
1341 if (PyList_Append(list, str)) { \
1342 Py_DECREF(str); \
1343 goto onError; \
1344 } \
1345 else \
1346 Py_DECREF(str);
1347
Andrew Dalke02758d62006-05-26 15:21:01 +00001348#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001349 str = PyString_FromStringAndSize((data) + (left), \
1350 (right) - (left)); \
1351 if (str == NULL) \
1352 goto onError; \
1353 if (count < MAX_PREALLOC) { \
1354 PyList_SET_ITEM(list, count, str); \
1355 } else { \
1356 if (PyList_Append(list, str)) { \
1357 Py_DECREF(str); \
1358 goto onError; \
1359 } \
1360 else \
1361 Py_DECREF(str); \
1362 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001363 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001364
1365/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001366#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001367
Andrew Dalke02758d62006-05-26 15:21:01 +00001368#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1369#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1370#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1371#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1372
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001373Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001374split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375{
Andrew Dalke525eab32006-05-26 14:00:45 +00001376 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001377 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001378 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379
1380 if (list == NULL)
1381 return NULL;
1382
Andrew Dalke02758d62006-05-26 15:21:01 +00001383 i = j = 0;
1384
1385 while (maxsplit-- > 0) {
1386 SKIP_SPACE(s, i, len);
1387 if (i==len) break;
1388 j = i; i++;
1389 SKIP_NONSPACE(s, i, len);
1390 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001392
1393 if (i < len) {
1394 /* Only occurs when maxsplit was reached */
1395 /* Skip any remaining whitespace and copy to end of string */
1396 SKIP_SPACE(s, i, len);
1397 if (i != len)
1398 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001399 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001400 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001402 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 Py_DECREF(list);
1404 return NULL;
1405}
1406
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001407Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001408split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001409{
Andrew Dalke525eab32006-05-26 14:00:45 +00001410 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001411 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001412 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001413
1414 if (list == NULL)
1415 return NULL;
1416
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001417 i = j = 0;
1418 while ((j < len) && (maxcount-- > 0)) {
1419 for(; j<len; j++) {
1420 /* I found that using memchr makes no difference */
1421 if (s[j] == ch) {
1422 SPLIT_ADD(s, i, j);
1423 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001424 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001425 }
1426 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001427 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001428 if (i <= len) {
1429 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001430 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001431 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001432 return list;
1433
1434 onError:
1435 Py_DECREF(list);
1436 return NULL;
1437}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001439PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440"S.split([sep [,maxsplit]]) -> list of strings\n\
1441\n\
1442Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001443delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001444splits are done. If sep is not specified or is None, any\n\
1445whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001446
1447static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001448string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001450 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001451 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001452 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001453 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001454#ifdef USE_FAST
1455 Py_ssize_t pos;
1456#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457
Martin v. Löwis9c830762006-04-13 08:37:17 +00001458 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001460 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001461 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001462 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001463 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001464 if (PyString_Check(subobj)) {
1465 sub = PyString_AS_STRING(subobj);
1466 n = PyString_GET_SIZE(subobj);
1467 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001468#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001469 else if (PyUnicode_Check(subobj))
1470 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001471#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1473 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001474
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475 if (n == 0) {
1476 PyErr_SetString(PyExc_ValueError, "empty separator");
1477 return NULL;
1478 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001479 else if (n == 1)
1480 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481
Andrew Dalke525eab32006-05-26 14:00:45 +00001482 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483 if (list == NULL)
1484 return NULL;
1485
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001486#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001488 while (maxsplit-- > 0) {
1489 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1490 if (pos < 0)
1491 break;
1492 j = i+pos;
1493 SPLIT_ADD(s, i, j);
1494 i = j + n;
1495
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001497#else
1498 i = j = 0;
1499 while ((j+n <= len) && (maxsplit-- > 0)) {
1500 for (; j+n <= len; j++) {
1501 if (Py_STRING_MATCH(s, j, sub, n)) {
1502 SPLIT_ADD(s, i, j);
1503 i = j = j + n;
1504 break;
1505 }
1506 }
1507 }
1508#endif
1509 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001510 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511 return list;
1512
Andrew Dalke525eab32006-05-26 14:00:45 +00001513 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001514 Py_DECREF(list);
1515 return NULL;
1516}
1517
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001518PyDoc_STRVAR(partition__doc__,
1519"S.partition(sep) -> (head, sep, tail)\n\
1520\n\
1521Searches for the separator sep in S, and returns the part before it,\n\
1522the separator itself, and the part after it. If the separator is not\n\
1523found, returns S and two empty strings.");
1524
1525static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001526string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001527{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001528 const char *sep;
1529 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001530
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001531 if (PyString_Check(sep_obj)) {
1532 sep = PyString_AS_STRING(sep_obj);
1533 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001534 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001535#ifdef Py_USING_UNICODE
1536 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001537 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001538#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001539 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001540 return NULL;
1541
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001542 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001543 (PyObject*) self,
1544 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1545 sep_obj, sep, sep_len
1546 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001547}
1548
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001549PyDoc_STRVAR(rpartition__doc__,
1550"S.rpartition(sep) -> (head, sep, tail)\n\
1551\n\
1552Searches for the separator sep in S, starting at the end of S, and returns\n\
1553the part before it, the separator itself, and the part after it. If the\n\
1554separator is not found, returns S and two empty strings.");
1555
1556static PyObject *
1557string_rpartition(PyStringObject *self, PyObject *sep_obj)
1558{
1559 const char *sep;
1560 Py_ssize_t sep_len;
1561
1562 if (PyString_Check(sep_obj)) {
1563 sep = PyString_AS_STRING(sep_obj);
1564 sep_len = PyString_GET_SIZE(sep_obj);
1565 }
1566#ifdef Py_USING_UNICODE
1567 else if (PyUnicode_Check(sep_obj))
1568 return PyUnicode_Partition((PyObject *) self, sep_obj);
1569#endif
1570 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1571 return NULL;
1572
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001573 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001574 (PyObject*) self,
1575 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1576 sep_obj, sep, sep_len
1577 );
1578}
1579
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001580Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001581rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001582{
Andrew Dalke525eab32006-05-26 14:00:45 +00001583 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001584 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001585 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001586
1587 if (list == NULL)
1588 return NULL;
1589
Andrew Dalke02758d62006-05-26 15:21:01 +00001590 i = j = len-1;
1591
1592 while (maxsplit-- > 0) {
1593 RSKIP_SPACE(s, i);
1594 if (i<0) break;
1595 j = i; i--;
1596 RSKIP_NONSPACE(s, i);
1597 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001598 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001599 if (i >= 0) {
1600 /* Only occurs when maxsplit was reached */
1601 /* Skip any remaining whitespace and copy to beginning of string */
1602 RSKIP_SPACE(s, i);
1603 if (i >= 0)
1604 SPLIT_ADD(s, 0, i + 1);
1605
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001606 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001607 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001608 if (PyList_Reverse(list) < 0)
1609 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001610 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001611 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001612 Py_DECREF(list);
1613 return NULL;
1614}
1615
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001616Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001617rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001618{
Andrew Dalke525eab32006-05-26 14:00:45 +00001619 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001620 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001621 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001622
1623 if (list == NULL)
1624 return NULL;
1625
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001626 i = j = len - 1;
1627 while ((i >= 0) && (maxcount-- > 0)) {
1628 for (; i >= 0; i--) {
1629 if (s[i] == ch) {
1630 SPLIT_ADD(s, i + 1, j + 1);
1631 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001632 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001633 }
1634 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001635 }
1636 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001637 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001638 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001639 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001640 if (PyList_Reverse(list) < 0)
1641 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001642 return list;
1643
1644 onError:
1645 Py_DECREF(list);
1646 return NULL;
1647}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001648
1649PyDoc_STRVAR(rsplit__doc__,
1650"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1651\n\
1652Return a list of the words in the string S, using sep as the\n\
1653delimiter string, starting at the end of the string and working\n\
1654to the front. If maxsplit is given, at most maxsplit splits are\n\
1655done. If sep is not specified or is None, any whitespace string\n\
1656is a separator.");
1657
1658static PyObject *
1659string_rsplit(PyStringObject *self, PyObject *args)
1660{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001661 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001662 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001663 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001664 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001665
Martin v. Löwis9c830762006-04-13 08:37:17 +00001666 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001667 return NULL;
1668 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001669 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001670 if (subobj == Py_None)
1671 return rsplit_whitespace(s, len, maxsplit);
1672 if (PyString_Check(subobj)) {
1673 sub = PyString_AS_STRING(subobj);
1674 n = PyString_GET_SIZE(subobj);
1675 }
1676#ifdef Py_USING_UNICODE
1677 else if (PyUnicode_Check(subobj))
1678 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1679#endif
1680 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1681 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001682
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001683 if (n == 0) {
1684 PyErr_SetString(PyExc_ValueError, "empty separator");
1685 return NULL;
1686 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001687 else if (n == 1)
1688 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001689
Andrew Dalke525eab32006-05-26 14:00:45 +00001690 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001691 if (list == NULL)
1692 return NULL;
1693
1694 j = len;
1695 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001696
1697 while ( (i >= 0) && (maxsplit-- > 0) ) {
1698 for (; i>=0; i--) {
1699 if (Py_STRING_MATCH(s, i, sub, n)) {
1700 SPLIT_ADD(s, i + n, j);
1701 j = i;
1702 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001703 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001704 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001705 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001706 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001707 SPLIT_ADD(s, 0, j);
1708 FIX_PREALLOC_SIZE(list);
1709 if (PyList_Reverse(list) < 0)
1710 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001711 return list;
1712
Andrew Dalke525eab32006-05-26 14:00:45 +00001713onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001714 Py_DECREF(list);
1715 return NULL;
1716}
1717
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001719PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001720"S.join(sequence) -> string\n\
1721\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001722Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001723sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724
1725static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001726string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727{
1728 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001729 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001732 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001733 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001734 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001735 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736
Tim Peters19fe14e2001-01-19 03:03:47 +00001737 seq = PySequence_Fast(orig, "");
1738 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001739 return NULL;
1740 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001741
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001742 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001743 if (seqlen == 0) {
1744 Py_DECREF(seq);
1745 return PyString_FromString("");
1746 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001748 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001749 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1750 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001751 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001752 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001753 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001755
Raymond Hettinger674f2412004-08-23 23:23:54 +00001756 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001757 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001758 * Do a pre-pass to figure out the total amount of space we'll
1759 * need (sz), see whether any argument is absurd, and defer to
1760 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001761 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001762 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001763 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001764 item = PySequence_Fast_GET_ITEM(seq, i);
1765 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001766#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001767 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001768 /* Defer to Unicode join.
1769 * CAUTION: There's no gurantee that the
1770 * original sequence can be iterated over
1771 * again, so we must pass seq here.
1772 */
1773 PyObject *result;
1774 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001775 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001776 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001777 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001778#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001779 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001780 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001781 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001782 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001783 Py_DECREF(seq);
1784 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001785 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001786 sz += PyString_GET_SIZE(item);
1787 if (i != 0)
1788 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001789 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001790 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001791 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001792 Py_DECREF(seq);
1793 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001795 }
1796
1797 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001798 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001799 if (res == NULL) {
1800 Py_DECREF(seq);
1801 return NULL;
1802 }
1803
1804 /* Catenate everything. */
1805 p = PyString_AS_STRING(res);
1806 for (i = 0; i < seqlen; ++i) {
1807 size_t n;
1808 item = PySequence_Fast_GET_ITEM(seq, i);
1809 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001810 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001811 p += n;
1812 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001813 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001814 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001815 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001817
Jeremy Hylton49048292000-07-11 03:28:17 +00001818 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820}
1821
Tim Peters52e155e2001-06-16 05:42:57 +00001822PyObject *
1823_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001824{
Tim Petersa7259592001-06-16 05:11:17 +00001825 assert(sep != NULL && PyString_Check(sep));
1826 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001827 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001828}
1829
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001830Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001831string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001832{
1833 if (*end > len)
1834 *end = len;
1835 else if (*end < 0)
1836 *end += len;
1837 if (*end < 0)
1838 *end = 0;
1839 if (*start < 0)
1840 *start += len;
1841 if (*start < 0)
1842 *start = 0;
1843}
1844
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001845Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001846string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001848 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001849 const char *sub;
1850 Py_ssize_t sub_len;
1851 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001852
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001853 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1854 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001855 return -2;
1856 if (PyString_Check(subobj)) {
1857 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001858 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001860#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001861 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001862 return PyUnicode_Find(
1863 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001864#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001865 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001866 /* XXX - the "expected a character buffer object" is pretty
1867 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868 return -2;
1869
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001870 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001871 return stringlib_find_slice(
1872 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1873 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001874 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001875 return stringlib_rfind_slice(
1876 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1877 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878}
1879
1880
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001881PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882"S.find(sub [,start [,end]]) -> int\n\
1883\n\
1884Return the lowest index in S where substring sub is found,\n\
1885such that sub is contained within s[start,end]. Optional\n\
1886arguments start and end are interpreted as in slice notation.\n\
1887\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001888Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889
1890static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001891string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001893 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894 if (result == -2)
1895 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001896 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897}
1898
1899
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001900PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901"S.index(sub [,start [,end]]) -> int\n\
1902\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001903Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904
1905static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001906string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001908 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909 if (result == -2)
1910 return NULL;
1911 if (result == -1) {
1912 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001913 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001914 return NULL;
1915 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001916 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917}
1918
1919
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001920PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921"S.rfind(sub [,start [,end]]) -> int\n\
1922\n\
1923Return the highest index in S where substring sub is found,\n\
1924such that sub is contained within s[start,end]. Optional\n\
1925arguments start and end are interpreted as in slice notation.\n\
1926\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001927Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928
1929static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001930string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001931{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001932 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933 if (result == -2)
1934 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001935 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001936}
1937
1938
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001939PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940"S.rindex(sub [,start [,end]]) -> int\n\
1941\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001942Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943
1944static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001945string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001946{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001947 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001948 if (result == -2)
1949 return NULL;
1950 if (result == -1) {
1951 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001952 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953 return NULL;
1954 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001955 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956}
1957
1958
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001959Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001960do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1961{
1962 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001963 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001964 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001965 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1966 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001967
1968 i = 0;
1969 if (striptype != RIGHTSTRIP) {
1970 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1971 i++;
1972 }
1973 }
1974
1975 j = len;
1976 if (striptype != LEFTSTRIP) {
1977 do {
1978 j--;
1979 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1980 j++;
1981 }
1982
1983 if (i == 0 && j == len && PyString_CheckExact(self)) {
1984 Py_INCREF(self);
1985 return (PyObject*)self;
1986 }
1987 else
1988 return PyString_FromStringAndSize(s+i, j-i);
1989}
1990
1991
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001992Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001993do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994{
1995 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001996 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998 i = 0;
1999 if (striptype != RIGHTSTRIP) {
2000 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2001 i++;
2002 }
2003 }
2004
2005 j = len;
2006 if (striptype != LEFTSTRIP) {
2007 do {
2008 j--;
2009 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2010 j++;
2011 }
2012
Tim Peters8fa5dd02001-09-12 02:18:30 +00002013 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014 Py_INCREF(self);
2015 return (PyObject*)self;
2016 }
2017 else
2018 return PyString_FromStringAndSize(s+i, j-i);
2019}
2020
2021
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002022Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002023do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2024{
2025 PyObject *sep = NULL;
2026
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002027 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002028 return NULL;
2029
2030 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002031 if (PyString_Check(sep))
2032 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002033#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002034 else if (PyUnicode_Check(sep)) {
2035 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2036 PyObject *res;
2037 if (uniself==NULL)
2038 return NULL;
2039 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2040 striptype, sep);
2041 Py_DECREF(uniself);
2042 return res;
2043 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002044#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002045 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002046#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002047 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002048#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002049 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002050#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002051 STRIPNAME(striptype));
2052 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002053 }
2054
2055 return do_strip(self, striptype);
2056}
2057
2058
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002059PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002060"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002061\n\
2062Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002063whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002064If chars is given and not None, remove characters in chars instead.\n\
2065If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066
2067static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002068string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002069{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002070 if (PyTuple_GET_SIZE(args) == 0)
2071 return do_strip(self, BOTHSTRIP); /* Common case */
2072 else
2073 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002074}
2075
2076
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002077PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002078"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002080Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002081If chars is given and not None, remove characters in chars instead.\n\
2082If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002083
2084static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002085string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002087 if (PyTuple_GET_SIZE(args) == 0)
2088 return do_strip(self, LEFTSTRIP); /* Common case */
2089 else
2090 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091}
2092
2093
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002094PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002095"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002096\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002097Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002098If chars is given and not None, remove characters in chars instead.\n\
2099If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100
2101static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002102string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002103{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002104 if (PyTuple_GET_SIZE(args) == 0)
2105 return do_strip(self, RIGHTSTRIP); /* Common case */
2106 else
2107 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108}
2109
2110
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002111PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112"S.lower() -> string\n\
2113\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002114Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002115
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002116/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2117#ifndef _tolower
2118#define _tolower tolower
2119#endif
2120
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002121static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002122string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002124 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002125 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002126 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002128 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002129 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002131
2132 s = PyString_AS_STRING(newobj);
2133
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002134 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002135
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002137 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002138 if (isupper(c))
2139 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002140 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002141
Anthony Baxtera6286212006-04-11 07:42:36 +00002142 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143}
2144
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002145PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146"S.upper() -> string\n\
2147\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002148Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002150#ifndef _toupper
2151#define _toupper toupper
2152#endif
2153
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002155string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002157 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002158 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002159 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002161 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002162 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002163 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002164
2165 s = PyString_AS_STRING(newobj);
2166
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002167 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002168
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002170 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002171 if (islower(c))
2172 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002174
Anthony Baxtera6286212006-04-11 07:42:36 +00002175 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176}
2177
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002178PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002179"S.title() -> string\n\
2180\n\
2181Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002182characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002183
2184static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002185string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186{
2187 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002188 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002189 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002190 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002191
Anthony Baxtera6286212006-04-11 07:42:36 +00002192 newobj = PyString_FromStringAndSize(NULL, n);
2193 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002195 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196 for (i = 0; i < n; i++) {
2197 int c = Py_CHARMASK(*s++);
2198 if (islower(c)) {
2199 if (!previous_is_cased)
2200 c = toupper(c);
2201 previous_is_cased = 1;
2202 } else if (isupper(c)) {
2203 if (previous_is_cased)
2204 c = tolower(c);
2205 previous_is_cased = 1;
2206 } else
2207 previous_is_cased = 0;
2208 *s_new++ = c;
2209 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002210 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002211}
2212
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002213PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214"S.capitalize() -> string\n\
2215\n\
2216Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002217capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218
2219static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002220string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221{
2222 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002223 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002224 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225
Anthony Baxtera6286212006-04-11 07:42:36 +00002226 newobj = PyString_FromStringAndSize(NULL, n);
2227 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002228 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002229 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 if (0 < n) {
2231 int c = Py_CHARMASK(*s++);
2232 if (islower(c))
2233 *s_new = toupper(c);
2234 else
2235 *s_new = c;
2236 s_new++;
2237 }
2238 for (i = 1; i < n; i++) {
2239 int c = Py_CHARMASK(*s++);
2240 if (isupper(c))
2241 *s_new = tolower(c);
2242 else
2243 *s_new = c;
2244 s_new++;
2245 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002246 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247}
2248
2249
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002250PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002251"S.count(sub[, start[, end]]) -> int\n\
2252\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002253Return the number of non-overlapping occurrences of substring sub in\n\
2254string S[start:end]. Optional arguments start and end are interpreted\n\
2255as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256
2257static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002258string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002259{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002260 PyObject *sub_obj;
2261 const char *str = PyString_AS_STRING(self), *sub;
2262 Py_ssize_t sub_len;
2263 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002264
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002265 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2266 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002268
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002269 if (PyString_Check(sub_obj)) {
2270 sub = PyString_AS_STRING(sub_obj);
2271 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002273#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002274 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002275 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002276 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002277 if (count == -1)
2278 return NULL;
2279 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002280 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002281 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002282#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002283 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284 return NULL;
2285
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002286 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002287
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002288 return PyInt_FromSsize_t(
2289 stringlib_count(str + start, end - start, sub, sub_len)
2290 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291}
2292
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002293PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294"S.swapcase() -> string\n\
2295\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002297converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298
2299static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002300string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301{
2302 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002303 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002304 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305
Anthony Baxtera6286212006-04-11 07:42:36 +00002306 newobj = PyString_FromStringAndSize(NULL, n);
2307 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002308 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002309 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310 for (i = 0; i < n; i++) {
2311 int c = Py_CHARMASK(*s++);
2312 if (islower(c)) {
2313 *s_new = toupper(c);
2314 }
2315 else if (isupper(c)) {
2316 *s_new = tolower(c);
2317 }
2318 else
2319 *s_new = c;
2320 s_new++;
2321 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002322 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002323}
2324
2325
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002326PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002327"S.translate(table [,deletechars]) -> string\n\
2328\n\
2329Return a copy of the string S, where all characters occurring\n\
2330in the optional argument deletechars are removed, and the\n\
2331remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002332translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333
2334static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002335string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002336{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337 register char *input, *output;
2338 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002339 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002342 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343 PyObject *result;
2344 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002345 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002347 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002348 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002350
2351 if (PyString_Check(tableobj)) {
2352 table1 = PyString_AS_STRING(tableobj);
2353 tablen = PyString_GET_SIZE(tableobj);
2354 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002355#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002357 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002358 parameter; instead a mapping to None will cause characters
2359 to be deleted. */
2360 if (delobj != NULL) {
2361 PyErr_SetString(PyExc_TypeError,
2362 "deletions are implemented differently for unicode");
2363 return NULL;
2364 }
2365 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2366 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002367#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002370
Martin v. Löwis00b61272002-12-12 20:03:19 +00002371 if (tablen != 256) {
2372 PyErr_SetString(PyExc_ValueError,
2373 "translation table must be 256 characters long");
2374 return NULL;
2375 }
2376
Guido van Rossum4c08d552000-03-10 22:55:18 +00002377 if (delobj != NULL) {
2378 if (PyString_Check(delobj)) {
2379 del_table = PyString_AS_STRING(delobj);
2380 dellen = PyString_GET_SIZE(delobj);
2381 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002382#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383 else if (PyUnicode_Check(delobj)) {
2384 PyErr_SetString(PyExc_TypeError,
2385 "deletions are implemented differently for unicode");
2386 return NULL;
2387 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002388#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002389 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2390 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002391 }
2392 else {
2393 del_table = NULL;
2394 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002395 }
2396
2397 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002398 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002399 result = PyString_FromStringAndSize((char *)NULL, inlen);
2400 if (result == NULL)
2401 return NULL;
2402 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002403 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404
2405 if (dellen == 0) {
2406 /* If no deletions are required, use faster code */
2407 for (i = inlen; --i >= 0; ) {
2408 c = Py_CHARMASK(*input++);
2409 if (Py_CHARMASK((*output++ = table[c])) != c)
2410 changed = 1;
2411 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002412 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413 return result;
2414 Py_DECREF(result);
2415 Py_INCREF(input_obj);
2416 return input_obj;
2417 }
2418
2419 for (i = 0; i < 256; i++)
2420 trans_table[i] = Py_CHARMASK(table[i]);
2421
2422 for (i = 0; i < dellen; i++)
2423 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2424
2425 for (i = inlen; --i >= 0; ) {
2426 c = Py_CHARMASK(*input++);
2427 if (trans_table[c] != -1)
2428 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2429 continue;
2430 changed = 1;
2431 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002432 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002433 Py_DECREF(result);
2434 Py_INCREF(input_obj);
2435 return input_obj;
2436 }
2437 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002438 if (inlen > 0)
2439 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002440 return result;
2441}
2442
2443
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002444#define FORWARD 1
2445#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002447/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002448
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002449#define findchar(target, target_len, c) \
2450 ((char *)memchr((const void *)(target), c, target_len))
2451
2452/* String ops must return a string. */
2453/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002454Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002455return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002456{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002457 if (PyString_CheckExact(self)) {
2458 Py_INCREF(self);
2459 return self;
2460 }
2461 return (PyStringObject *)PyString_FromStringAndSize(
2462 PyString_AS_STRING(self),
2463 PyString_GET_SIZE(self));
2464}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002465
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002466Py_LOCAL_INLINE(Py_ssize_t)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002467countchar(char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002468{
2469 Py_ssize_t count=0;
2470 char *start=target;
2471 char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002473 while ( (start=findchar(start, end-start, c)) != NULL ) {
2474 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002475 if (count >= maxcount)
2476 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002477 start += 1;
2478 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002479 return count;
2480}
2481
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002482Py_LOCAL(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002483findstring(char *target, Py_ssize_t target_len,
2484 char *pattern, Py_ssize_t pattern_len,
2485 Py_ssize_t start,
2486 Py_ssize_t end,
2487 int direction)
2488{
2489 if (start < 0) {
2490 start += target_len;
2491 if (start < 0)
2492 start = 0;
2493 }
2494 if (end > target_len) {
2495 end = target_len;
2496 } else if (end < 0) {
2497 end += target_len;
2498 if (end < 0)
2499 end = 0;
2500 }
2501
2502 /* zero-length substrings always match at the first attempt */
2503 if (pattern_len == 0)
2504 return (direction > 0) ? start : end;
2505
2506 end -= pattern_len;
2507
2508 if (direction < 0) {
2509 for (; end >= start; end--)
2510 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2511 return end;
2512 } else {
2513 for (; start <= end; start++)
2514 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2515 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002516 }
2517 return -1;
2518}
2519
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002520Py_LOCAL_INLINE(Py_ssize_t)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002521countstring(char *target, Py_ssize_t target_len,
2522 char *pattern, Py_ssize_t pattern_len,
2523 Py_ssize_t start,
2524 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002525 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002527 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002528
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002529 if (start < 0) {
2530 start += target_len;
2531 if (start < 0)
2532 start = 0;
2533 }
2534 if (end > target_len) {
2535 end = target_len;
2536 } else if (end < 0) {
2537 end += target_len;
2538 if (end < 0)
2539 end = 0;
2540 }
2541
2542 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002543 if (pattern_len == 0 || maxcount == 0) {
2544 if (target_len+1 < maxcount)
2545 return target_len+1;
2546 return maxcount;
2547 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002548
2549 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002550 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002551 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002552 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2553 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002554 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002555 end -= pattern_len-1;
2556 }
2557 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002558 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002559 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2560 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002561 if (--maxcount <= 0)
2562 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002563 start += pattern_len-1;
2564 }
2565 }
2566 return count;
2567}
2568
2569
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002570/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002571
2572/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002573Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002574replace_interleave(PyStringObject *self,
2575 PyStringObject *to,
2576 Py_ssize_t maxcount)
2577{
2578 char *self_s, *to_s, *result_s;
2579 Py_ssize_t self_len, to_len, result_len;
2580 Py_ssize_t count, i, product;
2581 PyStringObject *result;
2582
2583 self_len = PyString_GET_SIZE(self);
2584 to_len = PyString_GET_SIZE(to);
2585
2586 /* 1 at the end plus 1 after every character */
2587 count = self_len+1;
2588 if (maxcount < count)
2589 count = maxcount;
2590
2591 /* Check for overflow */
2592 /* result_len = count * to_len + self_len; */
2593 product = count * to_len;
2594 if (product / to_len != count) {
2595 PyErr_SetString(PyExc_OverflowError,
2596 "replace string is too long");
2597 return NULL;
2598 }
2599 result_len = product + self_len;
2600 if (result_len < 0) {
2601 PyErr_SetString(PyExc_OverflowError,
2602 "replace string is too long");
2603 return NULL;
2604 }
2605
2606 if (! (result = (PyStringObject *)
2607 PyString_FromStringAndSize(NULL, result_len)) )
2608 return NULL;
2609
2610 self_s = PyString_AS_STRING(self);
2611 to_s = PyString_AS_STRING(to);
2612 to_len = PyString_GET_SIZE(to);
2613 result_s = PyString_AS_STRING(result);
2614
2615 /* TODO: special case single character, which doesn't need memcpy */
2616
2617 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002618 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002619 result_s += to_len;
2620 count -= 1;
2621
2622 for (i=0; i<count; i++) {
2623 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002624 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002625 result_s += to_len;
2626 }
2627
2628 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002629 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002630
2631 return result;
2632}
2633
2634/* Special case for deleting a single character */
2635/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002636Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002637replace_delete_single_character(PyStringObject *self,
2638 char from_c, Py_ssize_t maxcount)
2639{
2640 char *self_s, *result_s;
2641 char *start, *next, *end;
2642 Py_ssize_t self_len, result_len;
2643 Py_ssize_t count;
2644 PyStringObject *result;
2645
2646 self_len = PyString_GET_SIZE(self);
2647 self_s = PyString_AS_STRING(self);
2648
Andrew Dalke51324072006-05-26 20:25:22 +00002649 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002650 if (count == 0) {
2651 return return_self(self);
2652 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002653
2654 result_len = self_len - count; /* from_len == 1 */
2655 assert(result_len>=0);
2656
2657 if ( (result = (PyStringObject *)
2658 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2659 return NULL;
2660 result_s = PyString_AS_STRING(result);
2661
2662 start = self_s;
2663 end = self_s + self_len;
2664 while (count-- > 0) {
2665 next = findchar(start, end-start, from_c);
2666 if (next == NULL)
2667 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002668 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002669 result_s += (next-start);
2670 start = next+1;
2671 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002672 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002673
2674 return result;
2675}
2676
2677/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2678
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002679Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002680replace_delete_substring(PyStringObject *self, PyStringObject *from,
2681 Py_ssize_t maxcount) {
2682 char *self_s, *from_s, *result_s;
2683 char *start, *next, *end;
2684 Py_ssize_t self_len, from_len, result_len;
2685 Py_ssize_t count, offset;
2686 PyStringObject *result;
2687
2688 self_len = PyString_GET_SIZE(self);
2689 self_s = PyString_AS_STRING(self);
2690 from_len = PyString_GET_SIZE(from);
2691 from_s = PyString_AS_STRING(from);
2692
2693 count = countstring(self_s, self_len,
2694 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002695 0, self_len, 1,
2696 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002697
2698 if (count == 0) {
2699 /* no matches */
2700 return return_self(self);
2701 }
2702
2703 result_len = self_len - (count * from_len);
2704 assert (result_len>=0);
2705
2706 if ( (result = (PyStringObject *)
2707 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2708 return NULL;
2709
2710 result_s = PyString_AS_STRING(result);
2711
2712 start = self_s;
2713 end = self_s + self_len;
2714 while (count-- > 0) {
2715 offset = findstring(start, end-start,
2716 from_s, from_len,
2717 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002718 if (offset == -1)
2719 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002720 next = start + offset;
2721
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002722 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002723
2724 result_s += (next-start);
2725 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002726 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002727 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002728 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002729}
2730
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002731/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002732Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002733replace_single_character_in_place(PyStringObject *self,
2734 char from_c, char to_c,
2735 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002736{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002737 char *self_s, *result_s, *start, *end, *next;
2738 Py_ssize_t self_len;
2739 PyStringObject *result;
2740
2741 /* The result string will be the same size */
2742 self_s = PyString_AS_STRING(self);
2743 self_len = PyString_GET_SIZE(self);
2744
2745 next = findchar(self_s, self_len, from_c);
2746
2747 if (next == NULL) {
2748 /* No matches; return the original string */
2749 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002750 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002751
2752 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002753 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002754 if (result == NULL)
2755 return NULL;
2756 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002757 Py_MEMCPY(result_s, self_s, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002758
2759 /* change everything in-place, starting with this one */
2760 start = result_s + (next-self_s);
2761 *start = to_c;
2762 start++;
2763 end = result_s + self_len;
2764
2765 while (--maxcount > 0) {
2766 next = findchar(start, end-start, from_c);
2767 if (next == NULL)
2768 break;
2769 *next = to_c;
2770 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002771 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002772
2773 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002774}
2775
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002776/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002777Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002778replace_substring_in_place(PyStringObject *self,
2779 PyStringObject *from,
2780 PyStringObject *to,
2781 Py_ssize_t maxcount)
2782{
2783 char *result_s, *start, *end;
2784 char *self_s, *from_s, *to_s;
2785 Py_ssize_t self_len, from_len, offset;
2786 PyStringObject *result;
2787
2788 /* The result string will be the same size */
2789
2790 self_s = PyString_AS_STRING(self);
2791 self_len = PyString_GET_SIZE(self);
2792
2793 from_s = PyString_AS_STRING(from);
2794 from_len = PyString_GET_SIZE(from);
2795 to_s = PyString_AS_STRING(to);
2796
2797 offset = findstring(self_s, self_len,
2798 from_s, from_len,
2799 0, self_len, FORWARD);
2800
2801 if (offset == -1) {
2802 /* No matches; return the original string */
2803 return return_self(self);
2804 }
2805
2806 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002807 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808 if (result == NULL)
2809 return NULL;
2810 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002811 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002812
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002813
2814 /* change everything in-place, starting with this one */
2815 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002816 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002817 start += from_len;
2818 end = result_s + self_len;
2819
2820 while ( --maxcount > 0) {
2821 offset = findstring(start, end-start,
2822 from_s, from_len,
2823 0, end-start, FORWARD);
2824 if (offset==-1)
2825 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002826 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 start += offset+from_len;
2828 }
2829
2830 return result;
2831}
2832
2833/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002834Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002835replace_single_character(PyStringObject *self,
2836 char from_c,
2837 PyStringObject *to,
2838 Py_ssize_t maxcount)
2839{
2840 char *self_s, *to_s, *result_s;
2841 char *start, *next, *end;
2842 Py_ssize_t self_len, to_len, result_len;
2843 Py_ssize_t count, product;
2844 PyStringObject *result;
2845
2846 self_s = PyString_AS_STRING(self);
2847 self_len = PyString_GET_SIZE(self);
2848
Andrew Dalke51324072006-05-26 20:25:22 +00002849 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002850
2851 if (count == 0) {
2852 /* no matches, return unchanged */
2853 return return_self(self);
2854 }
2855
2856 to_s = PyString_AS_STRING(to);
2857 to_len = PyString_GET_SIZE(to);
2858
2859 /* use the difference between current and new, hence the "-1" */
2860 /* result_len = self_len + count * (to_len-1) */
2861 product = count * (to_len-1);
2862 if (product / (to_len-1) != count) {
2863 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2864 return NULL;
2865 }
2866 result_len = self_len + product;
2867 if (result_len < 0) {
2868 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2869 return NULL;
2870 }
2871
2872 if ( (result = (PyStringObject *)
2873 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2874 return NULL;
2875 result_s = PyString_AS_STRING(result);
2876
2877 start = self_s;
2878 end = self_s + self_len;
2879 while (count-- > 0) {
2880 next = findchar(start, end-start, from_c);
2881 if (next == NULL)
2882 break;
2883
2884 if (next == start) {
2885 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002886 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002887 result_s += to_len;
2888 start += 1;
2889 } else {
2890 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002891 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002892 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002893 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002894 result_s += to_len;
2895 start = next+1;
2896 }
2897 }
2898 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002899 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002900
2901 return result;
2902}
2903
2904/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002905Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002906replace_substring(PyStringObject *self,
2907 PyStringObject *from,
2908 PyStringObject *to,
2909 Py_ssize_t maxcount) {
2910 char *self_s, *from_s, *to_s, *result_s;
2911 char *start, *next, *end;
2912 Py_ssize_t self_len, from_len, to_len, result_len;
2913 Py_ssize_t count, offset, product;
2914 PyStringObject *result;
2915
2916 self_s = PyString_AS_STRING(self);
2917 self_len = PyString_GET_SIZE(self);
2918 from_s = PyString_AS_STRING(from);
2919 from_len = PyString_GET_SIZE(from);
2920
2921 count = countstring(self_s, self_len,
2922 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002923 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002924 if (count == 0) {
2925 /* no matches, return unchanged */
2926 return return_self(self);
2927 }
2928
2929 to_s = PyString_AS_STRING(to);
2930 to_len = PyString_GET_SIZE(to);
2931
2932 /* Check for overflow */
2933 /* result_len = self_len + count * (to_len-from_len) */
2934 product = count * (to_len-from_len);
2935 if (product / (to_len-from_len) != count) {
2936 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2937 return NULL;
2938 }
2939 result_len = self_len + product;
2940 if (result_len < 0) {
2941 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2942 return NULL;
2943 }
2944
2945 if ( (result = (PyStringObject *)
2946 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2947 return NULL;
2948 result_s = PyString_AS_STRING(result);
2949
2950 start = self_s;
2951 end = self_s + self_len;
2952 while (count-- > 0) {
2953 offset = findstring(start, end-start,
2954 from_s, from_len,
2955 0, end-start, FORWARD);
2956 if (offset == -1)
2957 break;
2958 next = start+offset;
2959 if (next == start) {
2960 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002961 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002962 result_s += to_len;
2963 start += from_len;
2964 } else {
2965 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002966 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002967 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002968 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002969 result_s += to_len;
2970 start = next+from_len;
2971 }
2972 }
2973 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002974 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002975
2976 return result;
2977}
2978
2979
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002980Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002981replace(PyStringObject *self,
2982 PyStringObject *from,
2983 PyStringObject *to,
2984 Py_ssize_t maxcount)
2985{
2986 Py_ssize_t from_len, to_len;
2987
2988 if (maxcount < 0) {
2989 maxcount = PY_SSIZE_T_MAX;
2990 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2991 /* nothing to do; return the original string */
2992 return return_self(self);
2993 }
2994
2995 from_len = PyString_GET_SIZE(from);
2996 to_len = PyString_GET_SIZE(to);
2997
2998 if (maxcount == 0 ||
2999 (from_len == 0 && to_len == 0)) {
3000 /* nothing to do; return the original string */
3001 return return_self(self);
3002 }
3003
3004 /* Handle zero-length special cases */
3005
3006 if (from_len == 0) {
3007 /* insert the 'to' string everywhere. */
3008 /* >>> "Python".replace("", ".") */
3009 /* '.P.y.t.h.o.n.' */
3010 return replace_interleave(self, to, maxcount);
3011 }
3012
3013 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3014 /* point for an empty self string to generate a non-empty string */
3015 /* Special case so the remaining code always gets a non-empty string */
3016 if (PyString_GET_SIZE(self) == 0) {
3017 return return_self(self);
3018 }
3019
3020 if (to_len == 0) {
3021 /* delete all occurances of 'from' string */
3022 if (from_len == 1) {
3023 return replace_delete_single_character(
3024 self, PyString_AS_STRING(from)[0], maxcount);
3025 } else {
3026 return replace_delete_substring(self, from, maxcount);
3027 }
3028 }
3029
3030 /* Handle special case where both strings have the same length */
3031
3032 if (from_len == to_len) {
3033 if (from_len == 1) {
3034 return replace_single_character_in_place(
3035 self,
3036 PyString_AS_STRING(from)[0],
3037 PyString_AS_STRING(to)[0],
3038 maxcount);
3039 } else {
3040 return replace_substring_in_place(
3041 self, from, to, maxcount);
3042 }
3043 }
3044
3045 /* Otherwise use the more generic algorithms */
3046 if (from_len == 1) {
3047 return replace_single_character(self, PyString_AS_STRING(from)[0],
3048 to, maxcount);
3049 } else {
3050 /* len('from')>=2, len('to')>=1 */
3051 return replace_substring(self, from, to, maxcount);
3052 }
3053}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003054
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003055PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003056"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003057\n\
3058Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003059old replaced by new. If the optional argument count is\n\
3060given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003061
3062static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003063string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003064{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003065 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003066 PyObject *from, *to;
Jack Diederich60cbb3f2006-05-25 18:47:15 +00003067 const char *tmp_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003068 Py_ssize_t tmp_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003069
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003070 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003071 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003072
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003073 if (PyString_Check(from)) {
3074 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003076#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003077 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003078 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003079 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003080#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003081 else if (PyObject_AsCharBuffer(from, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082 return NULL;
3083
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003084 if (PyString_Check(to)) {
3085 /* Can this be made a '!check' after the Unicode check? */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003086 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003087#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003088 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003089 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003090 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003091#endif
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003092 else if (PyObject_AsCharBuffer(to, &tmp_s, &tmp_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003093 return NULL;
3094
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003095 return (PyObject *)replace((PyStringObject *) self,
3096 (PyStringObject *) from,
3097 (PyStringObject *) to, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003098}
3099
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003100/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003101
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003102/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003103 * against substr, using the start and end arguments. Returns
3104 * -1 on error, 0 if not found and 1 if found.
3105 */
3106Py_LOCAL(int)
3107_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3108 Py_ssize_t end, int direction)
3109{
3110 Py_ssize_t len = PyString_GET_SIZE(self);
3111 Py_ssize_t slen;
3112 const char* sub;
3113 const char* str;
3114
3115 if (PyString_Check(substr)) {
3116 sub = PyString_AS_STRING(substr);
3117 slen = PyString_GET_SIZE(substr);
3118 }
3119#ifdef Py_USING_UNICODE
3120 else if (PyUnicode_Check(substr))
3121 return PyUnicode_Tailmatch((PyObject *)self,
3122 substr, start, end, direction);
3123#endif
3124 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3125 return -1;
3126 str = PyString_AS_STRING(self);
3127
3128 string_adjust_indices(&start, &end, len);
3129
3130 if (direction < 0) {
3131 /* startswith */
3132 if (start+slen > len)
3133 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003134 } else {
3135 /* endswith */
3136 if (end-start < slen || start > len)
3137 return 0;
3138
3139 if (end-slen > start)
3140 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003141 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003142 if (end-start >= slen)
3143 return ! memcmp(str+start, sub, slen);
3144 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003145}
3146
3147
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003148PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003149"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003150\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003151Return True if S starts with the specified prefix, False otherwise.\n\
3152With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003153With optional end, stop comparing S at that position.\n\
3154prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003155
3156static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003157string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003158{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003159 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003160 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003161 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003162 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003163
Guido van Rossumc6821402000-05-08 14:08:05 +00003164 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3165 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003166 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003167 if (PyTuple_Check(subobj)) {
3168 Py_ssize_t i;
3169 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3170 result = _string_tailmatch(self,
3171 PyTuple_GET_ITEM(subobj, i),
3172 start, end, -1);
3173 if (result == -1)
3174 return NULL;
3175 else if (result) {
3176 Py_RETURN_TRUE;
3177 }
3178 }
3179 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003180 }
Georg Brandl24250812006-06-09 18:45:48 +00003181 result = _string_tailmatch(self, subobj, start, end, -1);
3182 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003183 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003184 else
Georg Brandl24250812006-06-09 18:45:48 +00003185 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003186}
3187
3188
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003189PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003190"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003191\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003192Return True if S ends with the specified suffix, False otherwise.\n\
3193With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003194With optional end, stop comparing S at that position.\n\
3195suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003196
3197static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003198string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003199{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003200 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003201 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003202 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003203 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003204
Guido van Rossumc6821402000-05-08 14:08:05 +00003205 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3206 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003207 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003208 if (PyTuple_Check(subobj)) {
3209 Py_ssize_t i;
3210 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3211 result = _string_tailmatch(self,
3212 PyTuple_GET_ITEM(subobj, i),
3213 start, end, +1);
3214 if (result == -1)
3215 return NULL;
3216 else if (result) {
3217 Py_RETURN_TRUE;
3218 }
3219 }
3220 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003221 }
Georg Brandl24250812006-06-09 18:45:48 +00003222 result = _string_tailmatch(self, subobj, start, end, +1);
3223 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003224 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003225 else
Georg Brandl24250812006-06-09 18:45:48 +00003226 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003227}
3228
3229
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003230PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003231"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003232\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003233Encodes S using the codec registered for encoding. encoding defaults\n\
3234to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003235handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003236a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3237'xmlcharrefreplace' as well as any other name registered with\n\
3238codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003239
3240static PyObject *
3241string_encode(PyStringObject *self, PyObject *args)
3242{
3243 char *encoding = NULL;
3244 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003245 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003246
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003247 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3248 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003249 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003250 if (v == NULL)
3251 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003252 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3253 PyErr_Format(PyExc_TypeError,
3254 "encoder did not return a string/unicode object "
3255 "(type=%.400s)",
3256 v->ob_type->tp_name);
3257 Py_DECREF(v);
3258 return NULL;
3259 }
3260 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003261
3262 onError:
3263 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003264}
3265
3266
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003267PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003268"S.decode([encoding[,errors]]) -> object\n\
3269\n\
3270Decodes S using the codec registered for encoding. encoding defaults\n\
3271to the default encoding. errors may be given to set a different error\n\
3272handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003273a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3274as well as any other name registerd with codecs.register_error that is\n\
3275able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003276
3277static PyObject *
3278string_decode(PyStringObject *self, PyObject *args)
3279{
3280 char *encoding = NULL;
3281 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003282 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003283
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003284 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3285 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003286 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003287 if (v == NULL)
3288 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003289 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3290 PyErr_Format(PyExc_TypeError,
3291 "decoder did not return a string/unicode object "
3292 "(type=%.400s)",
3293 v->ob_type->tp_name);
3294 Py_DECREF(v);
3295 return NULL;
3296 }
3297 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003298
3299 onError:
3300 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003301}
3302
3303
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003304PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003305"S.expandtabs([tabsize]) -> string\n\
3306\n\
3307Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003308If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003309
3310static PyObject*
3311string_expandtabs(PyStringObject *self, PyObject *args)
3312{
3313 const char *e, *p;
3314 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003315 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003316 PyObject *u;
3317 int tabsize = 8;
3318
3319 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3320 return NULL;
3321
Thomas Wouters7e474022000-07-16 12:04:32 +00003322 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003323 i = j = 0;
3324 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3325 for (p = PyString_AS_STRING(self); p < e; p++)
3326 if (*p == '\t') {
3327 if (tabsize > 0)
3328 j += tabsize - (j % tabsize);
3329 }
3330 else {
3331 j++;
3332 if (*p == '\n' || *p == '\r') {
3333 i += j;
3334 j = 0;
3335 }
3336 }
3337
3338 /* Second pass: create output string and fill it */
3339 u = PyString_FromStringAndSize(NULL, i + j);
3340 if (!u)
3341 return NULL;
3342
3343 j = 0;
3344 q = PyString_AS_STRING(u);
3345
3346 for (p = PyString_AS_STRING(self); p < e; p++)
3347 if (*p == '\t') {
3348 if (tabsize > 0) {
3349 i = tabsize - (j % tabsize);
3350 j += i;
3351 while (i--)
3352 *q++ = ' ';
3353 }
3354 }
3355 else {
3356 j++;
3357 *q++ = *p;
3358 if (*p == '\n' || *p == '\r')
3359 j = 0;
3360 }
3361
3362 return u;
3363}
3364
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003365Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003366pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003367{
3368 PyObject *u;
3369
3370 if (left < 0)
3371 left = 0;
3372 if (right < 0)
3373 right = 0;
3374
Tim Peters8fa5dd02001-09-12 02:18:30 +00003375 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003376 Py_INCREF(self);
3377 return (PyObject *)self;
3378 }
3379
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003380 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003381 left + PyString_GET_SIZE(self) + right);
3382 if (u) {
3383 if (left)
3384 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003385 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003386 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387 PyString_GET_SIZE(self));
3388 if (right)
3389 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3390 fill, right);
3391 }
3392
3393 return u;
3394}
3395
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003396PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003397"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003398"\n"
3399"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003400"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003401
3402static PyObject *
3403string_ljust(PyStringObject *self, PyObject *args)
3404{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003405 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003406 char fillchar = ' ';
3407
Thomas Wouters4abb3662006-04-19 14:50:15 +00003408 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003409 return NULL;
3410
Tim Peters8fa5dd02001-09-12 02:18:30 +00003411 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003412 Py_INCREF(self);
3413 return (PyObject*) self;
3414 }
3415
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003416 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003417}
3418
3419
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003420PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003421"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003422"\n"
3423"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003424"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003425
3426static PyObject *
3427string_rjust(PyStringObject *self, PyObject *args)
3428{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003429 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003430 char fillchar = ' ';
3431
Thomas Wouters4abb3662006-04-19 14:50:15 +00003432 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003433 return NULL;
3434
Tim Peters8fa5dd02001-09-12 02:18:30 +00003435 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003436 Py_INCREF(self);
3437 return (PyObject*) self;
3438 }
3439
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003440 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003441}
3442
3443
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003444PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003445"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003446"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003447"Return S centered in a string of length width. Padding is\n"
3448"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003449
3450static PyObject *
3451string_center(PyStringObject *self, PyObject *args)
3452{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003453 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003454 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003455 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003456
Thomas Wouters4abb3662006-04-19 14:50:15 +00003457 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003458 return NULL;
3459
Tim Peters8fa5dd02001-09-12 02:18:30 +00003460 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003461 Py_INCREF(self);
3462 return (PyObject*) self;
3463 }
3464
3465 marg = width - PyString_GET_SIZE(self);
3466 left = marg / 2 + (marg & width & 1);
3467
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003468 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003469}
3470
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003471PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003472"S.zfill(width) -> string\n"
3473"\n"
3474"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003475"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003476
3477static PyObject *
3478string_zfill(PyStringObject *self, PyObject *args)
3479{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003480 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003481 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003482 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003483 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003484
Thomas Wouters4abb3662006-04-19 14:50:15 +00003485 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003486 return NULL;
3487
3488 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003489 if (PyString_CheckExact(self)) {
3490 Py_INCREF(self);
3491 return (PyObject*) self;
3492 }
3493 else
3494 return PyString_FromStringAndSize(
3495 PyString_AS_STRING(self),
3496 PyString_GET_SIZE(self)
3497 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003498 }
3499
3500 fill = width - PyString_GET_SIZE(self);
3501
3502 s = pad(self, fill, 0, '0');
3503
3504 if (s == NULL)
3505 return NULL;
3506
3507 p = PyString_AS_STRING(s);
3508 if (p[fill] == '+' || p[fill] == '-') {
3509 /* move sign to beginning of string */
3510 p[0] = p[fill];
3511 p[fill] = '0';
3512 }
3513
3514 return (PyObject*) s;
3515}
3516
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003517PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003518"S.isspace() -> bool\n\
3519\n\
3520Return True if all characters in S are whitespace\n\
3521and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003522
3523static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003524string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003525{
Fred Drakeba096332000-07-09 07:04:36 +00003526 register const unsigned char *p
3527 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003528 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003529
Guido van Rossum4c08d552000-03-10 22:55:18 +00003530 /* Shortcut for single character strings */
3531 if (PyString_GET_SIZE(self) == 1 &&
3532 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003533 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003535 /* Special case for empty strings */
3536 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003537 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003538
Guido van Rossum4c08d552000-03-10 22:55:18 +00003539 e = p + PyString_GET_SIZE(self);
3540 for (; p < e; p++) {
3541 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003542 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003543 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003544 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003545}
3546
3547
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003548PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003549"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003550\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003551Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003552and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003553
3554static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003555string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003556{
Fred Drakeba096332000-07-09 07:04:36 +00003557 register const unsigned char *p
3558 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003559 register const unsigned char *e;
3560
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003561 /* Shortcut for single character strings */
3562 if (PyString_GET_SIZE(self) == 1 &&
3563 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003564 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003565
3566 /* Special case for empty strings */
3567 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003568 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003569
3570 e = p + PyString_GET_SIZE(self);
3571 for (; p < e; p++) {
3572 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003573 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003574 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003575 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003576}
3577
3578
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003579PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003580"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003582Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003583and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003584
3585static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003586string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003587{
Fred Drakeba096332000-07-09 07:04:36 +00003588 register const unsigned char *p
3589 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003590 register const unsigned char *e;
3591
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003592 /* Shortcut for single character strings */
3593 if (PyString_GET_SIZE(self) == 1 &&
3594 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003595 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003596
3597 /* Special case for empty strings */
3598 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003599 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003600
3601 e = p + PyString_GET_SIZE(self);
3602 for (; p < e; p++) {
3603 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003604 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003606 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003607}
3608
3609
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003610PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003611"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003613Return True if all characters in S are digits\n\
3614and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003615
3616static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003617string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003618{
Fred Drakeba096332000-07-09 07:04:36 +00003619 register const unsigned char *p
3620 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003621 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003622
Guido van Rossum4c08d552000-03-10 22:55:18 +00003623 /* Shortcut for single character strings */
3624 if (PyString_GET_SIZE(self) == 1 &&
3625 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003626 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003627
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003628 /* Special case for empty strings */
3629 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003630 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003631
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632 e = p + PyString_GET_SIZE(self);
3633 for (; p < e; p++) {
3634 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003635 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003636 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003637 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003638}
3639
3640
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003641PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003642"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003645at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646
3647static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003648string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649{
Fred Drakeba096332000-07-09 07:04:36 +00003650 register const unsigned char *p
3651 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003652 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003653 int cased;
3654
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655 /* Shortcut for single character strings */
3656 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003657 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003658
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003659 /* Special case for empty strings */
3660 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003661 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003662
Guido van Rossum4c08d552000-03-10 22:55:18 +00003663 e = p + PyString_GET_SIZE(self);
3664 cased = 0;
3665 for (; p < e; p++) {
3666 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003667 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003668 else if (!cased && islower(*p))
3669 cased = 1;
3670 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003671 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003672}
3673
3674
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003675PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003676"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003678Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003679at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680
3681static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003682string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683{
Fred Drakeba096332000-07-09 07:04:36 +00003684 register const unsigned char *p
3685 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003686 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003687 int cased;
3688
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689 /* Shortcut for single character strings */
3690 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003691 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003692
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003693 /* Special case for empty strings */
3694 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003695 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003696
Guido van Rossum4c08d552000-03-10 22:55:18 +00003697 e = p + PyString_GET_SIZE(self);
3698 cased = 0;
3699 for (; p < e; p++) {
3700 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003701 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702 else if (!cased && isupper(*p))
3703 cased = 1;
3704 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003705 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003706}
3707
3708
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003709PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003710"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003711\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003712Return True if S is a titlecased string and there is at least one\n\
3713character in S, i.e. uppercase characters may only follow uncased\n\
3714characters and lowercase characters only cased ones. Return False\n\
3715otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716
3717static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003718string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719{
Fred Drakeba096332000-07-09 07:04:36 +00003720 register const unsigned char *p
3721 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003722 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003723 int cased, previous_is_cased;
3724
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725 /* Shortcut for single character strings */
3726 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003727 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003728
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003729 /* Special case for empty strings */
3730 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003731 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003732
Guido van Rossum4c08d552000-03-10 22:55:18 +00003733 e = p + PyString_GET_SIZE(self);
3734 cased = 0;
3735 previous_is_cased = 0;
3736 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003737 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003738
3739 if (isupper(ch)) {
3740 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003741 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742 previous_is_cased = 1;
3743 cased = 1;
3744 }
3745 else if (islower(ch)) {
3746 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003747 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748 previous_is_cased = 1;
3749 cased = 1;
3750 }
3751 else
3752 previous_is_cased = 0;
3753 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003754 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003755}
3756
3757
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003758PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003759"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760\n\
3761Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003762Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003763is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003764
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765static PyObject*
3766string_splitlines(PyStringObject *self, PyObject *args)
3767{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003768 register Py_ssize_t i;
3769 register Py_ssize_t j;
3770 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003771 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003772 PyObject *list;
3773 PyObject *str;
3774 char *data;
3775
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003776 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777 return NULL;
3778
3779 data = PyString_AS_STRING(self);
3780 len = PyString_GET_SIZE(self);
3781
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003782 /* This does not use the preallocated list because splitlines is
3783 usually run with hundreds of newlines. The overhead of
3784 switching between PyList_SET_ITEM and append causes about a
3785 2-3% slowdown for that common case. A smarter implementation
3786 could move the if check out, so the SET_ITEMs are done first
3787 and the appends only done when the prealloc buffer is full.
3788 That's too much work for little gain.*/
3789
Guido van Rossum4c08d552000-03-10 22:55:18 +00003790 list = PyList_New(0);
3791 if (!list)
3792 goto onError;
3793
3794 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003795 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003796
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797 /* Find a line and append it */
3798 while (i < len && data[i] != '\n' && data[i] != '\r')
3799 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800
3801 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003802 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003803 if (i < len) {
3804 if (data[i] == '\r' && i + 1 < len &&
3805 data[i+1] == '\n')
3806 i += 2;
3807 else
3808 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003809 if (keepends)
3810 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003811 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003812 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003813 j = i;
3814 }
3815 if (j < len) {
3816 SPLIT_APPEND(data, j, len);
3817 }
3818
3819 return list;
3820
3821 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003822 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003823 return NULL;
3824}
3825
3826#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003827#undef SPLIT_ADD
3828#undef MAX_PREALLOC
3829#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003830
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003831static PyObject *
3832string_getnewargs(PyStringObject *v)
3833{
3834 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3835}
3836
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003837
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003838static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003839string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003840 /* Counterparts of the obsolete stropmodule functions; except
3841 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003842 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3843 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003844 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003845 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3846 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003847 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3848 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3849 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3850 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3851 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3852 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3853 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003854 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3855 capitalize__doc__},
3856 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3857 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3858 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003859 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003860 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3861 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3862 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3863 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3864 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3865 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3866 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003867 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3868 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003869 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3870 startswith__doc__},
3871 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3872 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3873 swapcase__doc__},
3874 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3875 translate__doc__},
3876 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3877 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3878 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3879 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3880 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3881 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3882 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3883 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3884 expandtabs__doc__},
3885 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3886 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003887 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003888 {NULL, NULL} /* sentinel */
3889};
3890
Jeremy Hylton938ace62002-07-17 16:30:39 +00003891static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003892str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3893
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003894static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003895string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003896{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003897 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003898 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003899
Guido van Rossumae960af2001-08-30 03:11:59 +00003900 if (type != &PyString_Type)
3901 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003902 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3903 return NULL;
3904 if (x == NULL)
3905 return PyString_FromString("");
3906 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003907}
3908
Guido van Rossumae960af2001-08-30 03:11:59 +00003909static PyObject *
3910str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3911{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003912 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003913 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003914
3915 assert(PyType_IsSubtype(type, &PyString_Type));
3916 tmp = string_new(&PyString_Type, args, kwds);
3917 if (tmp == NULL)
3918 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003919 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003920 n = PyString_GET_SIZE(tmp);
3921 pnew = type->tp_alloc(type, n);
3922 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003923 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003924 ((PyStringObject *)pnew)->ob_shash =
3925 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003926 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003927 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003928 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003929 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003930}
3931
Guido van Rossumcacfc072002-05-24 19:01:59 +00003932static PyObject *
3933basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3934{
3935 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003936 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003937 return NULL;
3938}
3939
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003940static PyObject *
3941string_mod(PyObject *v, PyObject *w)
3942{
3943 if (!PyString_Check(v)) {
3944 Py_INCREF(Py_NotImplemented);
3945 return Py_NotImplemented;
3946 }
3947 return PyString_Format(v, w);
3948}
3949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003950PyDoc_STRVAR(basestring_doc,
3951"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003952
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003953static PyNumberMethods string_as_number = {
3954 0, /*nb_add*/
3955 0, /*nb_subtract*/
3956 0, /*nb_multiply*/
3957 0, /*nb_divide*/
3958 string_mod, /*nb_remainder*/
3959};
3960
3961
Guido van Rossumcacfc072002-05-24 19:01:59 +00003962PyTypeObject PyBaseString_Type = {
3963 PyObject_HEAD_INIT(&PyType_Type)
3964 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003965 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003966 0,
3967 0,
3968 0, /* tp_dealloc */
3969 0, /* tp_print */
3970 0, /* tp_getattr */
3971 0, /* tp_setattr */
3972 0, /* tp_compare */
3973 0, /* tp_repr */
3974 0, /* tp_as_number */
3975 0, /* tp_as_sequence */
3976 0, /* tp_as_mapping */
3977 0, /* tp_hash */
3978 0, /* tp_call */
3979 0, /* tp_str */
3980 0, /* tp_getattro */
3981 0, /* tp_setattro */
3982 0, /* tp_as_buffer */
3983 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3984 basestring_doc, /* tp_doc */
3985 0, /* tp_traverse */
3986 0, /* tp_clear */
3987 0, /* tp_richcompare */
3988 0, /* tp_weaklistoffset */
3989 0, /* tp_iter */
3990 0, /* tp_iternext */
3991 0, /* tp_methods */
3992 0, /* tp_members */
3993 0, /* tp_getset */
3994 &PyBaseObject_Type, /* tp_base */
3995 0, /* tp_dict */
3996 0, /* tp_descr_get */
3997 0, /* tp_descr_set */
3998 0, /* tp_dictoffset */
3999 0, /* tp_init */
4000 0, /* tp_alloc */
4001 basestring_new, /* tp_new */
4002 0, /* tp_free */
4003};
4004
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004005PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004006"str(object) -> string\n\
4007\n\
4008Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004009If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004010
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004011PyTypeObject PyString_Type = {
4012 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004013 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004014 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004015 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004016 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004017 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004018 (printfunc)string_print, /* tp_print */
4019 0, /* tp_getattr */
4020 0, /* tp_setattr */
4021 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004022 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004023 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004024 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004025 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004026 (hashfunc)string_hash, /* tp_hash */
4027 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004028 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004029 PyObject_GenericGetAttr, /* tp_getattro */
4030 0, /* tp_setattro */
4031 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004032 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004033 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004034 string_doc, /* tp_doc */
4035 0, /* tp_traverse */
4036 0, /* tp_clear */
4037 (richcmpfunc)string_richcompare, /* tp_richcompare */
4038 0, /* tp_weaklistoffset */
4039 0, /* tp_iter */
4040 0, /* tp_iternext */
4041 string_methods, /* tp_methods */
4042 0, /* tp_members */
4043 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004044 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004045 0, /* tp_dict */
4046 0, /* tp_descr_get */
4047 0, /* tp_descr_set */
4048 0, /* tp_dictoffset */
4049 0, /* tp_init */
4050 0, /* tp_alloc */
4051 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004052 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004053};
4054
4055void
Fred Drakeba096332000-07-09 07:04:36 +00004056PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004057{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004058 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004059 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004060 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004061 if (w == NULL || !PyString_Check(*pv)) {
4062 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004063 *pv = NULL;
4064 return;
4065 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004066 v = string_concat((PyStringObject *) *pv, w);
4067 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004068 *pv = v;
4069}
4070
Guido van Rossum013142a1994-08-30 08:19:36 +00004071void
Fred Drakeba096332000-07-09 07:04:36 +00004072PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004073{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004074 PyString_Concat(pv, w);
4075 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004076}
4077
4078
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004079/* The following function breaks the notion that strings are immutable:
4080 it changes the size of a string. We get away with this only if there
4081 is only one module referencing the object. You can also think of it
4082 as creating a new string object and destroying the old one, only
4083 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004084 already be known to some other part of the code...
4085 Note that if there's not enough memory to resize the string, the original
4086 string object at *pv is deallocated, *pv is set to NULL, an "out of
4087 memory" exception is set, and -1 is returned. Else (on success) 0 is
4088 returned, and the value in *pv may or may not be the same as on input.
4089 As always, an extra byte is allocated for a trailing \0 byte (newsize
4090 does *not* include that), and a trailing \0 byte is stored.
4091*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004092
4093int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004094_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004095{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004096 register PyObject *v;
4097 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004098 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004099 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4100 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004101 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004102 Py_DECREF(v);
4103 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004104 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004105 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004106 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004107 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004108 _Py_ForgetReference(v);
4109 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004110 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004111 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004112 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004113 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004114 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004115 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004116 _Py_NewReference(*pv);
4117 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004118 sv->ob_size = newsize;
4119 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004120 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004121 return 0;
4122}
Guido van Rossume5372401993-03-16 12:15:04 +00004123
4124/* Helpers for formatstring */
4125
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004126Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004127getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004128{
Thomas Wouters977485d2006-02-16 15:59:12 +00004129 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004130 if (argidx < arglen) {
4131 (*p_argidx)++;
4132 if (arglen < 0)
4133 return args;
4134 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004135 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004136 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004137 PyErr_SetString(PyExc_TypeError,
4138 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004139 return NULL;
4140}
4141
Tim Peters38fd5b62000-09-21 05:43:11 +00004142/* Format codes
4143 * F_LJUST '-'
4144 * F_SIGN '+'
4145 * F_BLANK ' '
4146 * F_ALT '#'
4147 * F_ZERO '0'
4148 */
Guido van Rossume5372401993-03-16 12:15:04 +00004149#define F_LJUST (1<<0)
4150#define F_SIGN (1<<1)
4151#define F_BLANK (1<<2)
4152#define F_ALT (1<<3)
4153#define F_ZERO (1<<4)
4154
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004155Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004156formatfloat(char *buf, size_t buflen, int flags,
4157 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004158{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004159 /* fmt = '%#.' + `prec` + `type`
4160 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004161 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004162 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004163 x = PyFloat_AsDouble(v);
4164 if (x == -1.0 && PyErr_Occurred()) {
4165 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004166 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004167 }
Guido van Rossume5372401993-03-16 12:15:04 +00004168 if (prec < 0)
4169 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004170 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4171 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004172 /* Worst case length calc to ensure no buffer overrun:
4173
4174 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004175 fmt = %#.<prec>g
4176 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004177 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004178 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004179
4180 'f' formats:
4181 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4182 len = 1 + 50 + 1 + prec = 52 + prec
4183
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004184 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004185 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004186
4187 */
4188 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4189 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004190 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004191 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004192 return -1;
4193 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004194 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4195 (flags&F_ALT) ? "#" : "",
4196 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004197 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004198 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004199}
4200
Tim Peters38fd5b62000-09-21 05:43:11 +00004201/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4202 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4203 * Python's regular ints.
4204 * Return value: a new PyString*, or NULL if error.
4205 * . *pbuf is set to point into it,
4206 * *plen set to the # of chars following that.
4207 * Caller must decref it when done using pbuf.
4208 * The string starting at *pbuf is of the form
4209 * "-"? ("0x" | "0X")? digit+
4210 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004211 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004212 * There will be at least prec digits, zero-filled on the left if
4213 * necessary to get that many.
4214 * val object to be converted
4215 * flags bitmask of format flags; only F_ALT is looked at
4216 * prec minimum number of digits; 0-fill on left if needed
4217 * type a character in [duoxX]; u acts the same as d
4218 *
4219 * CAUTION: o, x and X conversions on regular ints can never
4220 * produce a '-' sign, but can for Python's unbounded ints.
4221 */
4222PyObject*
4223_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4224 char **pbuf, int *plen)
4225{
4226 PyObject *result = NULL;
4227 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004228 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004229 int sign; /* 1 if '-', else 0 */
4230 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004231 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004232 int numdigits; /* len == numnondigits + numdigits */
4233 int numnondigits = 0;
4234
4235 switch (type) {
4236 case 'd':
4237 case 'u':
4238 result = val->ob_type->tp_str(val);
4239 break;
4240 case 'o':
4241 result = val->ob_type->tp_as_number->nb_oct(val);
4242 break;
4243 case 'x':
4244 case 'X':
4245 numnondigits = 2;
4246 result = val->ob_type->tp_as_number->nb_hex(val);
4247 break;
4248 default:
4249 assert(!"'type' not in [duoxX]");
4250 }
4251 if (!result)
4252 return NULL;
4253
4254 /* To modify the string in-place, there can only be one reference. */
4255 if (result->ob_refcnt != 1) {
4256 PyErr_BadInternalCall();
4257 return NULL;
4258 }
4259 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004260 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004261 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004262 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4263 return NULL;
4264 }
4265 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004266 if (buf[len-1] == 'L') {
4267 --len;
4268 buf[len] = '\0';
4269 }
4270 sign = buf[0] == '-';
4271 numnondigits += sign;
4272 numdigits = len - numnondigits;
4273 assert(numdigits > 0);
4274
Tim Petersfff53252001-04-12 18:38:48 +00004275 /* Get rid of base marker unless F_ALT */
4276 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004277 /* Need to skip 0x, 0X or 0. */
4278 int skipped = 0;
4279 switch (type) {
4280 case 'o':
4281 assert(buf[sign] == '0');
4282 /* If 0 is only digit, leave it alone. */
4283 if (numdigits > 1) {
4284 skipped = 1;
4285 --numdigits;
4286 }
4287 break;
4288 case 'x':
4289 case 'X':
4290 assert(buf[sign] == '0');
4291 assert(buf[sign + 1] == 'x');
4292 skipped = 2;
4293 numnondigits -= 2;
4294 break;
4295 }
4296 if (skipped) {
4297 buf += skipped;
4298 len -= skipped;
4299 if (sign)
4300 buf[0] = '-';
4301 }
4302 assert(len == numnondigits + numdigits);
4303 assert(numdigits > 0);
4304 }
4305
4306 /* Fill with leading zeroes to meet minimum width. */
4307 if (prec > numdigits) {
4308 PyObject *r1 = PyString_FromStringAndSize(NULL,
4309 numnondigits + prec);
4310 char *b1;
4311 if (!r1) {
4312 Py_DECREF(result);
4313 return NULL;
4314 }
4315 b1 = PyString_AS_STRING(r1);
4316 for (i = 0; i < numnondigits; ++i)
4317 *b1++ = *buf++;
4318 for (i = 0; i < prec - numdigits; i++)
4319 *b1++ = '0';
4320 for (i = 0; i < numdigits; i++)
4321 *b1++ = *buf++;
4322 *b1 = '\0';
4323 Py_DECREF(result);
4324 result = r1;
4325 buf = PyString_AS_STRING(result);
4326 len = numnondigits + prec;
4327 }
4328
4329 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004330 if (type == 'X') {
4331 /* Need to convert all lower case letters to upper case.
4332 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004333 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004334 if (buf[i] >= 'a' && buf[i] <= 'x')
4335 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004336 }
4337 *pbuf = buf;
4338 *plen = len;
4339 return result;
4340}
4341
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004342Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004343formatint(char *buf, size_t buflen, int flags,
4344 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004345{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004346 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004347 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4348 + 1 + 1 = 24 */
4349 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004350 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004351 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004352
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004353 x = PyInt_AsLong(v);
4354 if (x == -1 && PyErr_Occurred()) {
4355 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004356 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004357 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004358 if (x < 0 && type == 'u') {
4359 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004360 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004361 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4362 sign = "-";
4363 else
4364 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004365 if (prec < 0)
4366 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004367
4368 if ((flags & F_ALT) &&
4369 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004370 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004371 * of issues that cause pain:
4372 * - when 0 is being converted, the C standard leaves off
4373 * the '0x' or '0X', which is inconsistent with other
4374 * %#x/%#X conversions and inconsistent with Python's
4375 * hex() function
4376 * - there are platforms that violate the standard and
4377 * convert 0 with the '0x' or '0X'
4378 * (Metrowerks, Compaq Tru64)
4379 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004380 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004381 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004382 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004383 * We can achieve the desired consistency by inserting our
4384 * own '0x' or '0X' prefix, and substituting %x/%X in place
4385 * of %#x/%#X.
4386 *
4387 * Note that this is the same approach as used in
4388 * formatint() in unicodeobject.c
4389 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004390 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4391 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004392 }
4393 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004394 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4395 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004396 prec, type);
4397 }
4398
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004399 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4400 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004401 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004402 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004403 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004404 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004405 return -1;
4406 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004407 if (sign[0])
4408 PyOS_snprintf(buf, buflen, fmt, -x);
4409 else
4410 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004411 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004412}
4413
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004414Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004415formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004416{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004417 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004418 if (PyString_Check(v)) {
4419 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004420 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004421 }
4422 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004423 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004424 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004425 }
4426 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004427 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004428}
4429
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004430/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4431
4432 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4433 chars are formatted. XXX This is a magic number. Each formatting
4434 routine does bounds checking to ensure no overflow, but a better
4435 solution may be to malloc a buffer of appropriate size for each
4436 format. For now, the current solution is sufficient.
4437*/
4438#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004439
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004440PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004441PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004442{
4443 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004444 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004445 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004446 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004447 PyObject *result, *orig_args;
4448#ifdef Py_USING_UNICODE
4449 PyObject *v, *w;
4450#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004451 PyObject *dict = NULL;
4452 if (format == NULL || !PyString_Check(format) || args == NULL) {
4453 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004454 return NULL;
4455 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004456 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004457 fmt = PyString_AS_STRING(format);
4458 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004459 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004460 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004461 if (result == NULL)
4462 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004463 res = PyString_AsString(result);
4464 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004465 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004466 argidx = 0;
4467 }
4468 else {
4469 arglen = -1;
4470 argidx = -2;
4471 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004472 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4473 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004474 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004475 while (--fmtcnt >= 0) {
4476 if (*fmt != '%') {
4477 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004478 rescnt = fmtcnt + 100;
4479 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004480 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004481 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004482 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004483 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004484 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004485 }
4486 *res++ = *fmt++;
4487 }
4488 else {
4489 /* Got a format specifier */
4490 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004491 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004492 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004493 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004494 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004495 PyObject *v = NULL;
4496 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004497 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004498 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004499 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004500 char formatbuf[FORMATBUFLEN];
4501 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004502#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004503 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004504 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004505#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004506
Guido van Rossumda9c2711996-12-05 21:58:58 +00004507 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004508 if (*fmt == '(') {
4509 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004510 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004511 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004512 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004513
4514 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004515 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004516 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004517 goto error;
4518 }
4519 ++fmt;
4520 --fmtcnt;
4521 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004522 /* Skip over balanced parentheses */
4523 while (pcount > 0 && --fmtcnt >= 0) {
4524 if (*fmt == ')')
4525 --pcount;
4526 else if (*fmt == '(')
4527 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004528 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004529 }
4530 keylen = fmt - keystart - 1;
4531 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004532 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004533 "incomplete format key");
4534 goto error;
4535 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004536 key = PyString_FromStringAndSize(keystart,
4537 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004538 if (key == NULL)
4539 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004540 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004541 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004542 args_owned = 0;
4543 }
4544 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004545 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004546 if (args == NULL) {
4547 goto error;
4548 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004549 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004550 arglen = -1;
4551 argidx = -2;
4552 }
Guido van Rossume5372401993-03-16 12:15:04 +00004553 while (--fmtcnt >= 0) {
4554 switch (c = *fmt++) {
4555 case '-': flags |= F_LJUST; continue;
4556 case '+': flags |= F_SIGN; continue;
4557 case ' ': flags |= F_BLANK; continue;
4558 case '#': flags |= F_ALT; continue;
4559 case '0': flags |= F_ZERO; continue;
4560 }
4561 break;
4562 }
4563 if (c == '*') {
4564 v = getnextarg(args, arglen, &argidx);
4565 if (v == NULL)
4566 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004567 if (!PyInt_Check(v)) {
4568 PyErr_SetString(PyExc_TypeError,
4569 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004570 goto error;
4571 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004572 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004573 if (width < 0) {
4574 flags |= F_LJUST;
4575 width = -width;
4576 }
Guido van Rossume5372401993-03-16 12:15:04 +00004577 if (--fmtcnt >= 0)
4578 c = *fmt++;
4579 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004580 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004581 width = c - '0';
4582 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004583 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004584 if (!isdigit(c))
4585 break;
4586 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004587 PyErr_SetString(
4588 PyExc_ValueError,
4589 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004590 goto error;
4591 }
4592 width = width*10 + (c - '0');
4593 }
4594 }
4595 if (c == '.') {
4596 prec = 0;
4597 if (--fmtcnt >= 0)
4598 c = *fmt++;
4599 if (c == '*') {
4600 v = getnextarg(args, arglen, &argidx);
4601 if (v == NULL)
4602 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004603 if (!PyInt_Check(v)) {
4604 PyErr_SetString(
4605 PyExc_TypeError,
4606 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004607 goto error;
4608 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004609 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004610 if (prec < 0)
4611 prec = 0;
4612 if (--fmtcnt >= 0)
4613 c = *fmt++;
4614 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004615 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004616 prec = c - '0';
4617 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004618 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004619 if (!isdigit(c))
4620 break;
4621 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004622 PyErr_SetString(
4623 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004624 "prec too big");
4625 goto error;
4626 }
4627 prec = prec*10 + (c - '0');
4628 }
4629 }
4630 } /* prec */
4631 if (fmtcnt >= 0) {
4632 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004633 if (--fmtcnt >= 0)
4634 c = *fmt++;
4635 }
4636 }
4637 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004638 PyErr_SetString(PyExc_ValueError,
4639 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004640 goto error;
4641 }
4642 if (c != '%') {
4643 v = getnextarg(args, arglen, &argidx);
4644 if (v == NULL)
4645 goto error;
4646 }
4647 sign = 0;
4648 fill = ' ';
4649 switch (c) {
4650 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004651 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004652 len = 1;
4653 break;
4654 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004655#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004656 if (PyUnicode_Check(v)) {
4657 fmt = fmt_start;
4658 argidx = argidx_start;
4659 goto unicode;
4660 }
Georg Brandld45014b2005-10-01 17:06:00 +00004661#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004662 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004663#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004664 if (temp != NULL && PyUnicode_Check(temp)) {
4665 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004666 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004667 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004668 goto unicode;
4669 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004670#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004671 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004672 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004673 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004674 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004675 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004676 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004677 if (!PyString_Check(temp)) {
4678 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004679 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004680 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004681 goto error;
4682 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004683 pbuf = PyString_AS_STRING(temp);
4684 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004685 if (prec >= 0 && len > prec)
4686 len = prec;
4687 break;
4688 case 'i':
4689 case 'd':
4690 case 'u':
4691 case 'o':
4692 case 'x':
4693 case 'X':
4694 if (c == 'i')
4695 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004696 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004697 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004698 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004699 prec, c, &pbuf, &ilen);
4700 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004701 if (!temp)
4702 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004703 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004704 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004705 else {
4706 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004707 len = formatint(pbuf,
4708 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004709 flags, prec, c, v);
4710 if (len < 0)
4711 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004712 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004713 }
4714 if (flags & F_ZERO)
4715 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004716 break;
4717 case 'e':
4718 case 'E':
4719 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004720 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004721 case 'g':
4722 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004723 if (c == 'F')
4724 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004725 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004726 len = formatfloat(pbuf, sizeof(formatbuf),
4727 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004728 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004729 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004730 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004731 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004732 fill = '0';
4733 break;
4734 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004735#ifdef Py_USING_UNICODE
4736 if (PyUnicode_Check(v)) {
4737 fmt = fmt_start;
4738 argidx = argidx_start;
4739 goto unicode;
4740 }
4741#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004742 pbuf = formatbuf;
4743 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004744 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004745 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004746 break;
4747 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004748 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004749 "unsupported format character '%c' (0x%x) "
4750 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004751 c, c,
4752 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004753 goto error;
4754 }
4755 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004756 if (*pbuf == '-' || *pbuf == '+') {
4757 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004758 len--;
4759 }
4760 else if (flags & F_SIGN)
4761 sign = '+';
4762 else if (flags & F_BLANK)
4763 sign = ' ';
4764 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004765 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004766 }
4767 if (width < len)
4768 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004769 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004770 reslen -= rescnt;
4771 rescnt = width + fmtcnt + 100;
4772 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004773 if (reslen < 0) {
4774 Py_DECREF(result);
4775 return PyErr_NoMemory();
4776 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004777 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004778 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004779 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004780 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004781 }
4782 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004783 if (fill != ' ')
4784 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004785 rescnt--;
4786 if (width > len)
4787 width--;
4788 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004789 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4790 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004791 assert(pbuf[1] == c);
4792 if (fill != ' ') {
4793 *res++ = *pbuf++;
4794 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004795 }
Tim Petersfff53252001-04-12 18:38:48 +00004796 rescnt -= 2;
4797 width -= 2;
4798 if (width < 0)
4799 width = 0;
4800 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004801 }
4802 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004803 do {
4804 --rescnt;
4805 *res++ = fill;
4806 } while (--width > len);
4807 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004808 if (fill == ' ') {
4809 if (sign)
4810 *res++ = sign;
4811 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004812 (c == 'x' || c == 'X')) {
4813 assert(pbuf[0] == '0');
4814 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004815 *res++ = *pbuf++;
4816 *res++ = *pbuf++;
4817 }
4818 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004819 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004820 res += len;
4821 rescnt -= len;
4822 while (--width >= len) {
4823 --rescnt;
4824 *res++ = ' ';
4825 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004826 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004827 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004828 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004829 goto error;
4830 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004831 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004832 } /* '%' */
4833 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004834 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004835 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004836 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004837 goto error;
4838 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004839 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004840 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004841 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004842 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004843 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004844
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004845#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004846 unicode:
4847 if (args_owned) {
4848 Py_DECREF(args);
4849 args_owned = 0;
4850 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004851 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004852 if (PyTuple_Check(orig_args) && argidx > 0) {
4853 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004854 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004855 v = PyTuple_New(n);
4856 if (v == NULL)
4857 goto error;
4858 while (--n >= 0) {
4859 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4860 Py_INCREF(w);
4861 PyTuple_SET_ITEM(v, n, w);
4862 }
4863 args = v;
4864 } else {
4865 Py_INCREF(orig_args);
4866 args = orig_args;
4867 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004868 args_owned = 1;
4869 /* Take what we have of the result and let the Unicode formatting
4870 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004871 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004872 if (_PyString_Resize(&result, rescnt))
4873 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004874 fmtcnt = PyString_GET_SIZE(format) - \
4875 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004876 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4877 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004878 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004879 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004880 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004881 if (v == NULL)
4882 goto error;
4883 /* Paste what we have (result) to what the Unicode formatting
4884 function returned (v) and return the result (or error) */
4885 w = PyUnicode_Concat(result, v);
4886 Py_DECREF(result);
4887 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004888 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004889 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004890#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004891
Guido van Rossume5372401993-03-16 12:15:04 +00004892 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004893 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004894 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004895 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004896 }
Guido van Rossume5372401993-03-16 12:15:04 +00004897 return NULL;
4898}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004899
Guido van Rossum2a61e741997-01-18 07:55:05 +00004900void
Fred Drakeba096332000-07-09 07:04:36 +00004901PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004902{
4903 register PyStringObject *s = (PyStringObject *)(*p);
4904 PyObject *t;
4905 if (s == NULL || !PyString_Check(s))
4906 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004907 /* If it's a string subclass, we don't really know what putting
4908 it in the interned dict might do. */
4909 if (!PyString_CheckExact(s))
4910 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004911 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004912 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004913 if (interned == NULL) {
4914 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004915 if (interned == NULL) {
4916 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004917 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004918 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004919 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004920 t = PyDict_GetItem(interned, (PyObject *)s);
4921 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004922 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004923 Py_DECREF(*p);
4924 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004925 return;
4926 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004927
Armin Rigo79f7ad22004-08-07 19:27:39 +00004928 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004929 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004930 return;
4931 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004932 /* The two references in interned are not counted by refcnt.
4933 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004934 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004935 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004936}
4937
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004938void
4939PyString_InternImmortal(PyObject **p)
4940{
4941 PyString_InternInPlace(p);
4942 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4943 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4944 Py_INCREF(*p);
4945 }
4946}
4947
Guido van Rossum2a61e741997-01-18 07:55:05 +00004948
4949PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004950PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004951{
4952 PyObject *s = PyString_FromString(cp);
4953 if (s == NULL)
4954 return NULL;
4955 PyString_InternInPlace(&s);
4956 return s;
4957}
4958
Guido van Rossum8cf04761997-08-02 02:57:45 +00004959void
Fred Drakeba096332000-07-09 07:04:36 +00004960PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004961{
4962 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004963 for (i = 0; i < UCHAR_MAX + 1; i++) {
4964 Py_XDECREF(characters[i]);
4965 characters[i] = NULL;
4966 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004967 Py_XDECREF(nullstring);
4968 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004969}
Barry Warsawa903ad982001-02-23 16:40:48 +00004970
Barry Warsawa903ad982001-02-23 16:40:48 +00004971void _Py_ReleaseInternedStrings(void)
4972{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004973 PyObject *keys;
4974 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004975 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004976
4977 if (interned == NULL || !PyDict_Check(interned))
4978 return;
4979 keys = PyDict_Keys(interned);
4980 if (keys == NULL || !PyList_Check(keys)) {
4981 PyErr_Clear();
4982 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004983 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004984
4985 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4986 detector, interned strings are not forcibly deallocated; rather, we
4987 give them their stolen references back, and then clear and DECREF
4988 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004989
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004990 fprintf(stderr, "releasing interned strings\n");
4991 n = PyList_GET_SIZE(keys);
4992 for (i = 0; i < n; i++) {
4993 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4994 switch (s->ob_sstate) {
4995 case SSTATE_NOT_INTERNED:
4996 /* XXX Shouldn't happen */
4997 break;
4998 case SSTATE_INTERNED_IMMORTAL:
4999 s->ob_refcnt += 1;
5000 break;
5001 case SSTATE_INTERNED_MORTAL:
5002 s->ob_refcnt += 2;
5003 break;
5004 default:
5005 Py_FatalError("Inconsistent interned string state.");
5006 }
5007 s->ob_sstate = SSTATE_NOT_INTERNED;
5008 }
5009 Py_DECREF(keys);
5010 PyDict_Clear(interned);
5011 Py_DECREF(interned);
5012 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005013}