blob: 91f010348615a80d871a697ee1df63fc8365c2c7 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Fredrik Lundhaf722372006-05-25 17:55:31 +0000769/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000770/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771
Fredrik Lundha50d2012006-05-26 17:04:58 +0000772#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000773
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000774#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000778
Fredrik Lundhb9479482006-05-26 17:22:38 +0000779#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000780
Fredrik Lundha50d2012006-05-26 17:04:58 +0000781#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000782
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000783#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000785#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000786
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000807#ifdef __VMS
808 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
809#else
810 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
811#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000812 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000814
Thomas Wouters7e474022000-07-16 12:04:32 +0000815 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000816 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000817 if (memchr(op->ob_sval, '\'', op->ob_size) &&
818 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000819 quote = '"';
820
821 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000822 for (i = 0; i < op->ob_size; i++) {
823 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000824 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000825 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000826 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000827 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000828 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000829 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000830 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000831 fprintf(fp, "\\r");
832 else if (c < ' ' || c >= 0x7f)
833 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000834 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000835 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000836 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000837 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000838 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000839}
840
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000841PyObject *
842PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000844 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000845 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000846 PyObject *v;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000847 if (newsize > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000848 PyErr_SetString(PyExc_OverflowError,
849 "string is too large to make repr");
850 }
851 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000852 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000853 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000854 }
855 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000856 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 register char c;
858 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000859 int quote;
860
Thomas Wouters7e474022000-07-16 12:04:32 +0000861 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000862 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000863 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000864 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000865 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000866 quote = '"';
867
Tim Peters9161c8b2001-12-03 01:55:38 +0000868 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000869 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000871 /* There's at least enough room for a hex escape
872 and a closing quote. */
873 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000875 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000876 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000877 else if (c == '\t')
878 *p++ = '\\', *p++ = 't';
879 else if (c == '\n')
880 *p++ = '\\', *p++ = 'n';
881 else if (c == '\r')
882 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000883 else if (c < ' ' || c >= 0x7f) {
884 /* For performance, we don't want to call
885 PyOS_snprintf here (extra layers of
886 function call). */
887 sprintf(p, "\\x%02x", c & 0xff);
888 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000889 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000890 else
891 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000892 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000893 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000894 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000896 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000897 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000898 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000900}
901
Guido van Rossum189f1df2001-05-01 16:51:53 +0000902static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000903string_repr(PyObject *op)
904{
905 return PyString_Repr(op, 1);
906}
907
908static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000909string_str(PyObject *s)
910{
Tim Petersc9933152001-10-16 20:18:24 +0000911 assert(PyString_Check(s));
912 if (PyString_CheckExact(s)) {
913 Py_INCREF(s);
914 return s;
915 }
916 else {
917 /* Subtype -- return genuine string with the same value. */
918 PyStringObject *t = (PyStringObject *) s;
919 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
920 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921}
922
Martin v. Löwis18e16552006-02-15 17:27:45 +0000923static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000924string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000925{
926 return a->ob_size;
927}
928
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000929static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000930string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000931{
Andrew Dalke598710c2006-05-25 18:18:39 +0000932 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000933 register PyStringObject *op;
934 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000935#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000936 if (PyUnicode_Check(bb))
937 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000938#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000939 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000940 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000941 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000942 return NULL;
943 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000944#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000946 if ((a->ob_size == 0 || b->ob_size == 0) &&
947 PyString_CheckExact(a) && PyString_CheckExact(b)) {
948 if (a->ob_size == 0) {
949 Py_INCREF(bb);
950 return bb;
951 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952 Py_INCREF(a);
953 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 }
955 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000956 if (size < 0) {
957 PyErr_SetString(PyExc_OverflowError,
958 "strings are too large to concat");
959 return NULL;
960 }
961
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000962 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000963 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000964 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000965 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000966 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000967 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000968 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000969 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
970 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000971 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000972 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000973#undef b
974}
975
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000976static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000977string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000978{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000979 register Py_ssize_t i;
980 register Py_ssize_t j;
981 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000982 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000983 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000984 if (n < 0)
985 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000986 /* watch out for overflows: the size can overflow int,
987 * and the # of bytes needed can overflow size_t
988 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000989 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000990 if (n && size / n != a->ob_size) {
991 PyErr_SetString(PyExc_OverflowError,
992 "repeated string is too long");
993 return NULL;
994 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000995 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996 Py_INCREF(a);
997 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998 }
Tim Peterse7c05322004-06-27 17:24:49 +0000999 nbytes = (size_t)size;
1000 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001001 PyErr_SetString(PyExc_OverflowError,
1002 "repeated string is too long");
1003 return NULL;
1004 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001006 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001007 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001008 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001009 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001010 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001011 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001012 op->ob_sval[size] = '\0';
1013 if (a->ob_size == 1 && n > 0) {
1014 memset(op->ob_sval, a->ob_sval[0] , n);
1015 return (PyObject *) op;
1016 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001017 i = 0;
1018 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001019 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001020 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001021 }
1022 while (i < size) {
1023 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001024 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001025 i += j;
1026 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001027 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001028}
1029
1030/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1031
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001032static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001033string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001035 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001036{
1037 if (i < 0)
1038 i = 0;
1039 if (j < 0)
1040 j = 0; /* Avoid signed/unsigned bug in next line */
1041 if (j > a->ob_size)
1042 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001043 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1044 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001045 Py_INCREF(a);
1046 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001047 }
1048 if (j < i)
1049 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001050 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051}
1052
Guido van Rossum9284a572000-03-07 15:53:43 +00001053static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001054string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001055{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001056 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001057#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001058 if (PyUnicode_Check(sub_obj))
1059 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001060#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001061 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001062 PyErr_SetString(PyExc_TypeError,
1063 "'in <string>' requires string as left operand");
1064 return -1;
1065 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001066 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001067
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001068 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001069}
1070
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001071static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001072string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001073{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001074 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001075 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001076 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001077 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001078 return NULL;
1079 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001080 pchar = a->ob_sval[i];
1081 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001082 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001083 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001084 else {
1085#ifdef COUNT_ALLOCS
1086 one_strings++;
1087#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001088 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001089 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001090 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091}
1092
Martin v. Löwiscd353062001-05-24 16:56:35 +00001093static PyObject*
1094string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001095{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001096 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001097 Py_ssize_t len_a, len_b;
1098 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001099 PyObject *result;
1100
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001101 /* Make sure both arguments are strings. */
1102 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001103 result = Py_NotImplemented;
1104 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001105 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001106 if (a == b) {
1107 switch (op) {
1108 case Py_EQ:case Py_LE:case Py_GE:
1109 result = Py_True;
1110 goto out;
1111 case Py_NE:case Py_LT:case Py_GT:
1112 result = Py_False;
1113 goto out;
1114 }
1115 }
1116 if (op == Py_EQ) {
1117 /* Supporting Py_NE here as well does not save
1118 much time, since Py_NE is rarely used. */
1119 if (a->ob_size == b->ob_size
1120 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001121 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001122 a->ob_size) == 0)) {
1123 result = Py_True;
1124 } else {
1125 result = Py_False;
1126 }
1127 goto out;
1128 }
1129 len_a = a->ob_size; len_b = b->ob_size;
1130 min_len = (len_a < len_b) ? len_a : len_b;
1131 if (min_len > 0) {
1132 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1133 if (c==0)
1134 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1135 }else
1136 c = 0;
1137 if (c == 0)
1138 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1139 switch (op) {
1140 case Py_LT: c = c < 0; break;
1141 case Py_LE: c = c <= 0; break;
1142 case Py_EQ: assert(0); break; /* unreachable */
1143 case Py_NE: c = c != 0; break;
1144 case Py_GT: c = c > 0; break;
1145 case Py_GE: c = c >= 0; break;
1146 default:
1147 result = Py_NotImplemented;
1148 goto out;
1149 }
1150 result = c ? Py_True : Py_False;
1151 out:
1152 Py_INCREF(result);
1153 return result;
1154}
1155
1156int
1157_PyString_Eq(PyObject *o1, PyObject *o2)
1158{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001159 PyStringObject *a = (PyStringObject*) o1;
1160 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001161 return a->ob_size == b->ob_size
1162 && *a->ob_sval == *b->ob_sval
1163 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001164}
1165
Guido van Rossum9bfef441993-03-29 10:43:31 +00001166static long
Fred Drakeba096332000-07-09 07:04:36 +00001167string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001168{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001169 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001170 register unsigned char *p;
1171 register long x;
1172
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001173 if (a->ob_shash != -1)
1174 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001175 len = a->ob_size;
1176 p = (unsigned char *) a->ob_sval;
1177 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001178 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001179 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180 x ^= a->ob_size;
1181 if (x == -1)
1182 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001183 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001184 return x;
1185}
1186
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001187#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1188
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001189static PyObject*
1190string_subscript(PyStringObject* self, PyObject* item)
1191{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001192 PyNumberMethods *nb = item->ob_type->tp_as_number;
1193 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1194 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001195 if (i == -1 && PyErr_Occurred())
1196 return NULL;
1197 if (i < 0)
1198 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001199 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001200 }
1201 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001202 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001203 char* source_buf;
1204 char* result_buf;
1205 PyObject* result;
1206
Tim Petersae1d0c92006-03-17 03:29:34 +00001207 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001208 PyString_GET_SIZE(self),
1209 &start, &stop, &step, &slicelength) < 0) {
1210 return NULL;
1211 }
1212
1213 if (slicelength <= 0) {
1214 return PyString_FromStringAndSize("", 0);
1215 }
1216 else {
1217 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001218 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001219 if (result_buf == NULL)
1220 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001221
Tim Petersae1d0c92006-03-17 03:29:34 +00001222 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001223 cur += step, i++) {
1224 result_buf[i] = source_buf[cur];
1225 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001226
1227 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 slicelength);
1229 PyMem_Free(result_buf);
1230 return result;
1231 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001232 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001233 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001234 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001235 "string indices must be integers");
1236 return NULL;
1237 }
1238}
1239
Martin v. Löwis18e16552006-02-15 17:27:45 +00001240static Py_ssize_t
1241string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001242{
1243 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001244 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001245 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001246 return -1;
1247 }
1248 *ptr = (void *)self->ob_sval;
1249 return self->ob_size;
1250}
1251
Martin v. Löwis18e16552006-02-15 17:27:45 +00001252static Py_ssize_t
1253string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001254{
Guido van Rossum045e6881997-09-08 18:30:11 +00001255 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001256 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257 return -1;
1258}
1259
Martin v. Löwis18e16552006-02-15 17:27:45 +00001260static Py_ssize_t
1261string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001262{
1263 if ( lenp )
1264 *lenp = self->ob_size;
1265 return 1;
1266}
1267
Martin v. Löwis18e16552006-02-15 17:27:45 +00001268static Py_ssize_t
1269string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001270{
1271 if ( index != 0 ) {
1272 PyErr_SetString(PyExc_SystemError,
1273 "accessing non-existent string segment");
1274 return -1;
1275 }
1276 *ptr = self->ob_sval;
1277 return self->ob_size;
1278}
1279
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001280static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001281 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001282 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001283 (ssizeargfunc)string_repeat, /*sq_repeat*/
1284 (ssizeargfunc)string_item, /*sq_item*/
1285 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001286 0, /*sq_ass_item*/
1287 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001288 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001289};
1290
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001291static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001293 (binaryfunc)string_subscript,
1294 0,
1295};
1296
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001298 (readbufferproc)string_buffer_getreadbuf,
1299 (writebufferproc)string_buffer_getwritebuf,
1300 (segcountproc)string_buffer_getsegcount,
1301 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001302};
1303
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304
1305
1306#define LEFTSTRIP 0
1307#define RIGHTSTRIP 1
1308#define BOTHSTRIP 2
1309
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001310/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001311static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1312
1313#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001314
Andrew Dalke525eab32006-05-26 14:00:45 +00001315
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001316/* Don't call if length < 2 */
1317#define Py_STRING_MATCH(target, offset, pattern, length) \
1318 (target[offset] == pattern[0] && \
1319 target[offset+length-1] == pattern[length-1] && \
1320 !memcmp(target+offset+1, pattern+1, length-2) )
1321
1322
Andrew Dalke525eab32006-05-26 14:00:45 +00001323/* Overallocate the initial list to reduce the number of reallocs for small
1324 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1325 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1326 text (roughly 11 words per line) and field delimited data (usually 1-10
1327 fields). For large strings the split algorithms are bandwidth limited
1328 so increasing the preallocation likely will not improve things.*/
1329
1330#define MAX_PREALLOC 12
1331
1332/* 5 splits gives 6 elements */
1333#define PREALLOC_SIZE(maxsplit) \
1334 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1335
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001336#define SPLIT_APPEND(data, left, right) \
1337 str = PyString_FromStringAndSize((data) + (left), \
1338 (right) - (left)); \
1339 if (str == NULL) \
1340 goto onError; \
1341 if (PyList_Append(list, str)) { \
1342 Py_DECREF(str); \
1343 goto onError; \
1344 } \
1345 else \
1346 Py_DECREF(str);
1347
Andrew Dalke02758d62006-05-26 15:21:01 +00001348#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001349 str = PyString_FromStringAndSize((data) + (left), \
1350 (right) - (left)); \
1351 if (str == NULL) \
1352 goto onError; \
1353 if (count < MAX_PREALLOC) { \
1354 PyList_SET_ITEM(list, count, str); \
1355 } else { \
1356 if (PyList_Append(list, str)) { \
1357 Py_DECREF(str); \
1358 goto onError; \
1359 } \
1360 else \
1361 Py_DECREF(str); \
1362 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001363 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001364
1365/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001366#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001367
Andrew Dalke02758d62006-05-26 15:21:01 +00001368#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1369#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1370#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1371#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1372
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001373Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001374split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375{
Andrew Dalke525eab32006-05-26 14:00:45 +00001376 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001377 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001378 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379
1380 if (list == NULL)
1381 return NULL;
1382
Andrew Dalke02758d62006-05-26 15:21:01 +00001383 i = j = 0;
1384
1385 while (maxsplit-- > 0) {
1386 SKIP_SPACE(s, i, len);
1387 if (i==len) break;
1388 j = i; i++;
1389 SKIP_NONSPACE(s, i, len);
1390 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001392
1393 if (i < len) {
1394 /* Only occurs when maxsplit was reached */
1395 /* Skip any remaining whitespace and copy to end of string */
1396 SKIP_SPACE(s, i, len);
1397 if (i != len)
1398 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001399 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001400 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001401 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001402 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 Py_DECREF(list);
1404 return NULL;
1405}
1406
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001407Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001408split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001409{
Andrew Dalke525eab32006-05-26 14:00:45 +00001410 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001411 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001412 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001413
1414 if (list == NULL)
1415 return NULL;
1416
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001417 i = j = 0;
1418 while ((j < len) && (maxcount-- > 0)) {
1419 for(; j<len; j++) {
1420 /* I found that using memchr makes no difference */
1421 if (s[j] == ch) {
1422 SPLIT_ADD(s, i, j);
1423 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001424 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001425 }
1426 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001427 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001428 if (i <= len) {
1429 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001430 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001431 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001432 return list;
1433
1434 onError:
1435 Py_DECREF(list);
1436 return NULL;
1437}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001438
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001439PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001440"S.split([sep [,maxsplit]]) -> list of strings\n\
1441\n\
1442Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001443delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001444splits are done. If sep is not specified or is None, any\n\
1445whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001446
1447static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001448string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001449{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001450 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001451 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001452 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001453 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001454#ifdef USE_FAST
1455 Py_ssize_t pos;
1456#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457
Martin v. Löwis9c830762006-04-13 08:37:17 +00001458 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001460 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001461 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001462 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001463 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001464 if (PyString_Check(subobj)) {
1465 sub = PyString_AS_STRING(subobj);
1466 n = PyString_GET_SIZE(subobj);
1467 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001468#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001469 else if (PyUnicode_Check(subobj))
1470 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001471#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1473 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001474
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475 if (n == 0) {
1476 PyErr_SetString(PyExc_ValueError, "empty separator");
1477 return NULL;
1478 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001479 else if (n == 1)
1480 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481
Andrew Dalke525eab32006-05-26 14:00:45 +00001482 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483 if (list == NULL)
1484 return NULL;
1485
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001486#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001488 while (maxsplit-- > 0) {
1489 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1490 if (pos < 0)
1491 break;
1492 j = i+pos;
1493 SPLIT_ADD(s, i, j);
1494 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001496#else
1497 i = j = 0;
1498 while ((j+n <= len) && (maxsplit-- > 0)) {
1499 for (; j+n <= len; j++) {
1500 if (Py_STRING_MATCH(s, j, sub, n)) {
1501 SPLIT_ADD(s, i, j);
1502 i = j = j + n;
1503 break;
1504 }
1505 }
1506 }
1507#endif
1508 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001509 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510 return list;
1511
Andrew Dalke525eab32006-05-26 14:00:45 +00001512 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001513 Py_DECREF(list);
1514 return NULL;
1515}
1516
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001517PyDoc_STRVAR(partition__doc__,
1518"S.partition(sep) -> (head, sep, tail)\n\
1519\n\
1520Searches for the separator sep in S, and returns the part before it,\n\
1521the separator itself, and the part after it. If the separator is not\n\
1522found, returns S and two empty strings.");
1523
1524static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001525string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001526{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001527 const char *sep;
1528 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001529
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001530 if (PyString_Check(sep_obj)) {
1531 sep = PyString_AS_STRING(sep_obj);
1532 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001533 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001534#ifdef Py_USING_UNICODE
1535 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001536 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001537#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001538 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001539 return NULL;
1540
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001541 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001542 (PyObject*) self,
1543 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1544 sep_obj, sep, sep_len
1545 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001546}
1547
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001548PyDoc_STRVAR(rpartition__doc__,
1549"S.rpartition(sep) -> (head, sep, tail)\n\
1550\n\
1551Searches for the separator sep in S, starting at the end of S, and returns\n\
1552the part before it, the separator itself, and the part after it. If the\n\
1553separator is not found, returns S and two empty strings.");
1554
1555static PyObject *
1556string_rpartition(PyStringObject *self, PyObject *sep_obj)
1557{
1558 const char *sep;
1559 Py_ssize_t sep_len;
1560
1561 if (PyString_Check(sep_obj)) {
1562 sep = PyString_AS_STRING(sep_obj);
1563 sep_len = PyString_GET_SIZE(sep_obj);
1564 }
1565#ifdef Py_USING_UNICODE
1566 else if (PyUnicode_Check(sep_obj))
1567 return PyUnicode_Partition((PyObject *) self, sep_obj);
1568#endif
1569 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1570 return NULL;
1571
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001572 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001573 (PyObject*) self,
1574 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1575 sep_obj, sep, sep_len
1576 );
1577}
1578
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001579Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001580rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001581{
Andrew Dalke525eab32006-05-26 14:00:45 +00001582 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001583 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001584 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001585
1586 if (list == NULL)
1587 return NULL;
1588
Andrew Dalke02758d62006-05-26 15:21:01 +00001589 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001590
Andrew Dalke02758d62006-05-26 15:21:01 +00001591 while (maxsplit-- > 0) {
1592 RSKIP_SPACE(s, i);
1593 if (i<0) break;
1594 j = i; i--;
1595 RSKIP_NONSPACE(s, i);
1596 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001597 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001598 if (i >= 0) {
1599 /* Only occurs when maxsplit was reached */
1600 /* Skip any remaining whitespace and copy to beginning of string */
1601 RSKIP_SPACE(s, i);
1602 if (i >= 0)
1603 SPLIT_ADD(s, 0, i + 1);
1604
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001605 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001606 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001607 if (PyList_Reverse(list) < 0)
1608 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001609 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001610 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001611 Py_DECREF(list);
1612 return NULL;
1613}
1614
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001615Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001616rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001617{
Andrew Dalke525eab32006-05-26 14:00:45 +00001618 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001619 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001620 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001621
1622 if (list == NULL)
1623 return NULL;
1624
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001625 i = j = len - 1;
1626 while ((i >= 0) && (maxcount-- > 0)) {
1627 for (; i >= 0; i--) {
1628 if (s[i] == ch) {
1629 SPLIT_ADD(s, i + 1, j + 1);
1630 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001632 }
1633 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001634 }
1635 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001636 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001637 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001638 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001639 if (PyList_Reverse(list) < 0)
1640 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001641 return list;
1642
1643 onError:
1644 Py_DECREF(list);
1645 return NULL;
1646}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001647
1648PyDoc_STRVAR(rsplit__doc__,
1649"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1650\n\
1651Return a list of the words in the string S, using sep as the\n\
1652delimiter string, starting at the end of the string and working\n\
1653to the front. If maxsplit is given, at most maxsplit splits are\n\
1654done. If sep is not specified or is None, any whitespace string\n\
1655is a separator.");
1656
1657static PyObject *
1658string_rsplit(PyStringObject *self, PyObject *args)
1659{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001660 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001661 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001662 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001663 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001664
Martin v. Löwis9c830762006-04-13 08:37:17 +00001665 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001666 return NULL;
1667 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001668 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001669 if (subobj == Py_None)
1670 return rsplit_whitespace(s, len, maxsplit);
1671 if (PyString_Check(subobj)) {
1672 sub = PyString_AS_STRING(subobj);
1673 n = PyString_GET_SIZE(subobj);
1674 }
1675#ifdef Py_USING_UNICODE
1676 else if (PyUnicode_Check(subobj))
1677 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1678#endif
1679 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1680 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001681
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001682 if (n == 0) {
1683 PyErr_SetString(PyExc_ValueError, "empty separator");
1684 return NULL;
1685 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001686 else if (n == 1)
1687 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001688
Andrew Dalke525eab32006-05-26 14:00:45 +00001689 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001690 if (list == NULL)
1691 return NULL;
1692
1693 j = len;
1694 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001695
1696 while ( (i >= 0) && (maxsplit-- > 0) ) {
1697 for (; i>=0; i--) {
1698 if (Py_STRING_MATCH(s, i, sub, n)) {
1699 SPLIT_ADD(s, i + n, j);
1700 j = i;
1701 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001702 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001703 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001704 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001705 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001706 SPLIT_ADD(s, 0, j);
1707 FIX_PREALLOC_SIZE(list);
1708 if (PyList_Reverse(list) < 0)
1709 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001710 return list;
1711
Andrew Dalke525eab32006-05-26 14:00:45 +00001712onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001713 Py_DECREF(list);
1714 return NULL;
1715}
1716
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001718PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719"S.join(sequence) -> string\n\
1720\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001721Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001722sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001723
1724static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001725string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001726{
1727 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001728 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001731 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001732 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001733 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001734 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735
Tim Peters19fe14e2001-01-19 03:03:47 +00001736 seq = PySequence_Fast(orig, "");
1737 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001738 return NULL;
1739 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001740
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001741 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001742 if (seqlen == 0) {
1743 Py_DECREF(seq);
1744 return PyString_FromString("");
1745 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001747 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001748 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1749 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001750 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001751 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001752 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001753 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001754
Raymond Hettinger674f2412004-08-23 23:23:54 +00001755 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001756 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001757 * Do a pre-pass to figure out the total amount of space we'll
1758 * need (sz), see whether any argument is absurd, and defer to
1759 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001760 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001761 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001763 item = PySequence_Fast_GET_ITEM(seq, i);
1764 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001765#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001766 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001767 /* Defer to Unicode join.
1768 * CAUTION: There's no gurantee that the
1769 * original sequence can be iterated over
1770 * again, so we must pass seq here.
1771 */
1772 PyObject *result;
1773 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001774 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001775 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001776 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001777#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001779 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001780 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001781 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001782 Py_DECREF(seq);
1783 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001784 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001785 sz += PyString_GET_SIZE(item);
1786 if (i != 0)
1787 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001788 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001789 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001790 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001791 Py_DECREF(seq);
1792 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 }
1795
1796 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001797 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001798 if (res == NULL) {
1799 Py_DECREF(seq);
1800 return NULL;
1801 }
1802
1803 /* Catenate everything. */
1804 p = PyString_AS_STRING(res);
1805 for (i = 0; i < seqlen; ++i) {
1806 size_t n;
1807 item = PySequence_Fast_GET_ITEM(seq, i);
1808 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001809 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001810 p += n;
1811 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001812 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001813 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001814 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001815 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001816
Jeremy Hylton49048292000-07-11 03:28:17 +00001817 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819}
1820
Tim Peters52e155e2001-06-16 05:42:57 +00001821PyObject *
1822_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001823{
Tim Petersa7259592001-06-16 05:11:17 +00001824 assert(sep != NULL && PyString_Check(sep));
1825 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001826 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001827}
1828
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001829Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001830string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001831{
1832 if (*end > len)
1833 *end = len;
1834 else if (*end < 0)
1835 *end += len;
1836 if (*end < 0)
1837 *end = 0;
1838 if (*start < 0)
1839 *start += len;
1840 if (*start < 0)
1841 *start = 0;
1842}
1843
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001844Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001845string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001847 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001848 const char *sub;
1849 Py_ssize_t sub_len;
1850 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001852 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1853 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001854 return -2;
1855 if (PyString_Check(subobj)) {
1856 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001857 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001858 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001859#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001860 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001861 return PyUnicode_Find(
1862 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001863#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001864 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001865 /* XXX - the "expected a character buffer object" is pretty
1866 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867 return -2;
1868
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001869 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001870 return stringlib_find_slice(
1871 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1872 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001873 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001874 return stringlib_rfind_slice(
1875 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1876 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877}
1878
1879
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001880PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001881"S.find(sub [,start [,end]]) -> int\n\
1882\n\
1883Return the lowest index in S where substring sub is found,\n\
1884such that sub is contained within s[start,end]. Optional\n\
1885arguments start and end are interpreted as in slice notation.\n\
1886\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001887Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001888
1889static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001890string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001891{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001892 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893 if (result == -2)
1894 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001895 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896}
1897
1898
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001899PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900"S.index(sub [,start [,end]]) -> int\n\
1901\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001902Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903
1904static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001905string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001907 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 if (result == -2)
1909 return NULL;
1910 if (result == -1) {
1911 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001912 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913 return NULL;
1914 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001915 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001916}
1917
1918
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001919PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920"S.rfind(sub [,start [,end]]) -> int\n\
1921\n\
1922Return the highest index in S where substring sub is found,\n\
1923such that sub is contained within s[start,end]. Optional\n\
1924arguments start and end are interpreted as in slice notation.\n\
1925\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001926Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927
1928static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001929string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001931 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932 if (result == -2)
1933 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001934 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001935}
1936
1937
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001938PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939"S.rindex(sub [,start [,end]]) -> int\n\
1940\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001941Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942
1943static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001944string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001945{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001946 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947 if (result == -2)
1948 return NULL;
1949 if (result == -1) {
1950 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001951 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952 return NULL;
1953 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001954 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955}
1956
1957
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001958Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001959do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1960{
1961 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001962 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001963 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001964 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1965 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001966
1967 i = 0;
1968 if (striptype != RIGHTSTRIP) {
1969 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1970 i++;
1971 }
1972 }
1973
1974 j = len;
1975 if (striptype != LEFTSTRIP) {
1976 do {
1977 j--;
1978 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1979 j++;
1980 }
1981
1982 if (i == 0 && j == len && PyString_CheckExact(self)) {
1983 Py_INCREF(self);
1984 return (PyObject*)self;
1985 }
1986 else
1987 return PyString_FromStringAndSize(s+i, j-i);
1988}
1989
1990
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001991Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001992do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001993{
1994 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001995 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997 i = 0;
1998 if (striptype != RIGHTSTRIP) {
1999 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2000 i++;
2001 }
2002 }
2003
2004 j = len;
2005 if (striptype != LEFTSTRIP) {
2006 do {
2007 j--;
2008 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2009 j++;
2010 }
2011
Tim Peters8fa5dd02001-09-12 02:18:30 +00002012 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013 Py_INCREF(self);
2014 return (PyObject*)self;
2015 }
2016 else
2017 return PyString_FromStringAndSize(s+i, j-i);
2018}
2019
2020
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002021Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002022do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2023{
2024 PyObject *sep = NULL;
2025
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002026 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002027 return NULL;
2028
2029 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002030 if (PyString_Check(sep))
2031 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002032#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002033 else if (PyUnicode_Check(sep)) {
2034 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2035 PyObject *res;
2036 if (uniself==NULL)
2037 return NULL;
2038 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2039 striptype, sep);
2040 Py_DECREF(uniself);
2041 return res;
2042 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002043#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002044 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002045#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002046 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002047#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002048 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002049#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002050 STRIPNAME(striptype));
2051 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002052 }
2053
2054 return do_strip(self, striptype);
2055}
2056
2057
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002058PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002059"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002060\n\
2061Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002062whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002063If chars is given and not None, remove characters in chars instead.\n\
2064If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002065
2066static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002067string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002068{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002069 if (PyTuple_GET_SIZE(args) == 0)
2070 return do_strip(self, BOTHSTRIP); /* Common case */
2071 else
2072 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002073}
2074
2075
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002076PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002077"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002078\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002080If chars is given and not None, remove characters in chars instead.\n\
2081If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002082
2083static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002084string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002086 if (PyTuple_GET_SIZE(args) == 0)
2087 return do_strip(self, LEFTSTRIP); /* Common case */
2088 else
2089 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090}
2091
2092
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002093PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002094"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002097If chars is given and not None, remove characters in chars instead.\n\
2098If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099
2100static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002101string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002103 if (PyTuple_GET_SIZE(args) == 0)
2104 return do_strip(self, RIGHTSTRIP); /* Common case */
2105 else
2106 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107}
2108
2109
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002110PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111"S.lower() -> string\n\
2112\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002113Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002115/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2116#ifndef _tolower
2117#define _tolower tolower
2118#endif
2119
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002121string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002122{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002123 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002124 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002125 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002127 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002128 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002130
2131 s = PyString_AS_STRING(newobj);
2132
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002133 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002134
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002135 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002136 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002137 if (isupper(c))
2138 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002140
Anthony Baxtera6286212006-04-11 07:42:36 +00002141 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142}
2143
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002144PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145"S.upper() -> string\n\
2146\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002147Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002149#ifndef _toupper
2150#define _toupper toupper
2151#endif
2152
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002154string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002155{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002156 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002157 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002158 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002160 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002161 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002162 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002163
2164 s = PyString_AS_STRING(newobj);
2165
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002166 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002167
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002169 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002170 if (islower(c))
2171 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002173
Anthony Baxtera6286212006-04-11 07:42:36 +00002174 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002175}
2176
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002177PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002178"S.title() -> string\n\
2179\n\
2180Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002181characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002182
2183static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002184string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002185{
2186 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002187 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002189 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190
Anthony Baxtera6286212006-04-11 07:42:36 +00002191 newobj = PyString_FromStringAndSize(NULL, n);
2192 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002194 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002195 for (i = 0; i < n; i++) {
2196 int c = Py_CHARMASK(*s++);
2197 if (islower(c)) {
2198 if (!previous_is_cased)
2199 c = toupper(c);
2200 previous_is_cased = 1;
2201 } else if (isupper(c)) {
2202 if (previous_is_cased)
2203 c = tolower(c);
2204 previous_is_cased = 1;
2205 } else
2206 previous_is_cased = 0;
2207 *s_new++ = c;
2208 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002209 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002210}
2211
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002212PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213"S.capitalize() -> string\n\
2214\n\
2215Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002216capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002217
2218static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002219string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002220{
2221 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002222 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002223 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224
Anthony Baxtera6286212006-04-11 07:42:36 +00002225 newobj = PyString_FromStringAndSize(NULL, n);
2226 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002228 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229 if (0 < n) {
2230 int c = Py_CHARMASK(*s++);
2231 if (islower(c))
2232 *s_new = toupper(c);
2233 else
2234 *s_new = c;
2235 s_new++;
2236 }
2237 for (i = 1; i < n; i++) {
2238 int c = Py_CHARMASK(*s++);
2239 if (isupper(c))
2240 *s_new = tolower(c);
2241 else
2242 *s_new = c;
2243 s_new++;
2244 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002245 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246}
2247
2248
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002249PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002250"S.count(sub[, start[, end]]) -> int\n\
2251\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002252Return the number of non-overlapping occurrences of substring sub in\n\
2253string S[start:end]. Optional arguments start and end are interpreted\n\
2254as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002255
2256static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002257string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002259 PyObject *sub_obj;
2260 const char *str = PyString_AS_STRING(self), *sub;
2261 Py_ssize_t sub_len;
2262 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002264 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2265 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002266 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002267
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002268 if (PyString_Check(sub_obj)) {
2269 sub = PyString_AS_STRING(sub_obj);
2270 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002271 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002272#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002273 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002274 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002275 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002276 if (count == -1)
2277 return NULL;
2278 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002279 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002280 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002281#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002282 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 return NULL;
2284
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002285 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002286
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002287 return PyInt_FromSsize_t(
2288 stringlib_count(str + start, end - start, sub, sub_len)
2289 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002290}
2291
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002292PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002293"S.swapcase() -> string\n\
2294\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002296converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002297
2298static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002299string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002300{
2301 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002302 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002303 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002304
Anthony Baxtera6286212006-04-11 07:42:36 +00002305 newobj = PyString_FromStringAndSize(NULL, n);
2306 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002308 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309 for (i = 0; i < n; i++) {
2310 int c = Py_CHARMASK(*s++);
2311 if (islower(c)) {
2312 *s_new = toupper(c);
2313 }
2314 else if (isupper(c)) {
2315 *s_new = tolower(c);
2316 }
2317 else
2318 *s_new = c;
2319 s_new++;
2320 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002321 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002322}
2323
2324
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002325PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326"S.translate(table [,deletechars]) -> string\n\
2327\n\
2328Return a copy of the string S, where all characters occurring\n\
2329in the optional argument deletechars are removed, and the\n\
2330remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002331translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332
2333static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002334string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002335{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002336 register char *input, *output;
2337 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002338 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002340 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002341 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002342 PyObject *result;
2343 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002344 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002345
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002346 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002347 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002348 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002349
2350 if (PyString_Check(tableobj)) {
2351 table1 = PyString_AS_STRING(tableobj);
2352 tablen = PyString_GET_SIZE(tableobj);
2353 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002354#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002355 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002356 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002357 parameter; instead a mapping to None will cause characters
2358 to be deleted. */
2359 if (delobj != NULL) {
2360 PyErr_SetString(PyExc_TypeError,
2361 "deletions are implemented differently for unicode");
2362 return NULL;
2363 }
2364 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2365 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002366#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002367 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002368 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369
Martin v. Löwis00b61272002-12-12 20:03:19 +00002370 if (tablen != 256) {
2371 PyErr_SetString(PyExc_ValueError,
2372 "translation table must be 256 characters long");
2373 return NULL;
2374 }
2375
Guido van Rossum4c08d552000-03-10 22:55:18 +00002376 if (delobj != NULL) {
2377 if (PyString_Check(delobj)) {
2378 del_table = PyString_AS_STRING(delobj);
2379 dellen = PyString_GET_SIZE(delobj);
2380 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002381#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002382 else if (PyUnicode_Check(delobj)) {
2383 PyErr_SetString(PyExc_TypeError,
2384 "deletions are implemented differently for unicode");
2385 return NULL;
2386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002387#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002388 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2389 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002390 }
2391 else {
2392 del_table = NULL;
2393 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002394 }
2395
2396 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002397 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002398 result = PyString_FromStringAndSize((char *)NULL, inlen);
2399 if (result == NULL)
2400 return NULL;
2401 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002402 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002403
2404 if (dellen == 0) {
2405 /* If no deletions are required, use faster code */
2406 for (i = inlen; --i >= 0; ) {
2407 c = Py_CHARMASK(*input++);
2408 if (Py_CHARMASK((*output++ = table[c])) != c)
2409 changed = 1;
2410 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002411 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002412 return result;
2413 Py_DECREF(result);
2414 Py_INCREF(input_obj);
2415 return input_obj;
2416 }
2417
2418 for (i = 0; i < 256; i++)
2419 trans_table[i] = Py_CHARMASK(table[i]);
2420
2421 for (i = 0; i < dellen; i++)
2422 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2423
2424 for (i = inlen; --i >= 0; ) {
2425 c = Py_CHARMASK(*input++);
2426 if (trans_table[c] != -1)
2427 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2428 continue;
2429 changed = 1;
2430 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002431 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002432 Py_DECREF(result);
2433 Py_INCREF(input_obj);
2434 return input_obj;
2435 }
2436 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002437 if (inlen > 0)
2438 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002439 return result;
2440}
2441
2442
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002443#define FORWARD 1
2444#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002445
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002446/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002447
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002448#define findchar(target, target_len, c) \
2449 ((char *)memchr((const void *)(target), c, target_len))
2450
2451/* String ops must return a string. */
2452/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002453Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002454return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002456 if (PyString_CheckExact(self)) {
2457 Py_INCREF(self);
2458 return self;
2459 }
2460 return (PyStringObject *)PyString_FromStringAndSize(
2461 PyString_AS_STRING(self),
2462 PyString_GET_SIZE(self));
2463}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002464
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002465Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002466countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002467{
2468 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002469 const char *start=target;
2470 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002471
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002472 while ( (start=findchar(start, end-start, c)) != NULL ) {
2473 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002474 if (count >= maxcount)
2475 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002476 start += 1;
2477 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002478 return count;
2479}
2480
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002481Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002482findstring(const char *target, Py_ssize_t target_len,
2483 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002484 Py_ssize_t start,
2485 Py_ssize_t end,
2486 int direction)
2487{
2488 if (start < 0) {
2489 start += target_len;
2490 if (start < 0)
2491 start = 0;
2492 }
2493 if (end > target_len) {
2494 end = target_len;
2495 } else if (end < 0) {
2496 end += target_len;
2497 if (end < 0)
2498 end = 0;
2499 }
2500
2501 /* zero-length substrings always match at the first attempt */
2502 if (pattern_len == 0)
2503 return (direction > 0) ? start : end;
2504
2505 end -= pattern_len;
2506
2507 if (direction < 0) {
2508 for (; end >= start; end--)
2509 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2510 return end;
2511 } else {
2512 for (; start <= end; start++)
2513 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2514 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002515 }
2516 return -1;
2517}
2518
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002519Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002520countstring(const char *target, Py_ssize_t target_len,
2521 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002522 Py_ssize_t start,
2523 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002524 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002525{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002526 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002527
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002528 if (start < 0) {
2529 start += target_len;
2530 if (start < 0)
2531 start = 0;
2532 }
2533 if (end > target_len) {
2534 end = target_len;
2535 } else if (end < 0) {
2536 end += target_len;
2537 if (end < 0)
2538 end = 0;
2539 }
2540
2541 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002542 if (pattern_len == 0 || maxcount == 0) {
2543 if (target_len+1 < maxcount)
2544 return target_len+1;
2545 return maxcount;
2546 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002547
2548 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002549 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002550 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002551 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2552 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002553 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002554 end -= pattern_len-1;
2555 }
2556 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002557 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002558 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2559 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002560 if (--maxcount <= 0)
2561 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002562 start += pattern_len-1;
2563 }
2564 }
2565 return count;
2566}
2567
2568
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002569/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002570
2571/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002572Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002573replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002574 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002575 Py_ssize_t maxcount)
2576{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002577 char *self_s, *result_s;
2578 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002579 Py_ssize_t count, i, product;
2580 PyStringObject *result;
2581
2582 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002583
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002584 /* 1 at the end plus 1 after every character */
2585 count = self_len+1;
2586 if (maxcount < count)
2587 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002588
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002589 /* Check for overflow */
2590 /* result_len = count * to_len + self_len; */
2591 product = count * to_len;
2592 if (product / to_len != count) {
2593 PyErr_SetString(PyExc_OverflowError,
2594 "replace string is too long");
2595 return NULL;
2596 }
2597 result_len = product + self_len;
2598 if (result_len < 0) {
2599 PyErr_SetString(PyExc_OverflowError,
2600 "replace string is too long");
2601 return NULL;
2602 }
2603
2604 if (! (result = (PyStringObject *)
2605 PyString_FromStringAndSize(NULL, result_len)) )
2606 return NULL;
2607
2608 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002609 result_s = PyString_AS_STRING(result);
2610
2611 /* TODO: special case single character, which doesn't need memcpy */
2612
2613 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002614 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002615 result_s += to_len;
2616 count -= 1;
2617
2618 for (i=0; i<count; i++) {
2619 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002620 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002621 result_s += to_len;
2622 }
2623
2624 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002625 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002626
2627 return result;
2628}
2629
2630/* Special case for deleting a single character */
2631/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002632Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002633replace_delete_single_character(PyStringObject *self,
2634 char from_c, Py_ssize_t maxcount)
2635{
2636 char *self_s, *result_s;
2637 char *start, *next, *end;
2638 Py_ssize_t self_len, result_len;
2639 Py_ssize_t count;
2640 PyStringObject *result;
2641
2642 self_len = PyString_GET_SIZE(self);
2643 self_s = PyString_AS_STRING(self);
2644
Andrew Dalke51324072006-05-26 20:25:22 +00002645 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002646 if (count == 0) {
2647 return return_self(self);
2648 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002649
2650 result_len = self_len - count; /* from_len == 1 */
2651 assert(result_len>=0);
2652
2653 if ( (result = (PyStringObject *)
2654 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2655 return NULL;
2656 result_s = PyString_AS_STRING(result);
2657
2658 start = self_s;
2659 end = self_s + self_len;
2660 while (count-- > 0) {
2661 next = findchar(start, end-start, from_c);
2662 if (next == NULL)
2663 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002664 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002665 result_s += (next-start);
2666 start = next+1;
2667 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002668 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002669
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002670 return result;
2671}
2672
2673/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2674
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002675Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002676replace_delete_substring(PyStringObject *self,
2677 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002678 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002679 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002680 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002681 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002682 Py_ssize_t count, offset;
2683 PyStringObject *result;
2684
2685 self_len = PyString_GET_SIZE(self);
2686 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002687
2688 count = countstring(self_s, self_len,
2689 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002690 0, self_len, 1,
2691 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002692
2693 if (count == 0) {
2694 /* no matches */
2695 return return_self(self);
2696 }
2697
2698 result_len = self_len - (count * from_len);
2699 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002700
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002701 if ( (result = (PyStringObject *)
2702 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2703 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002704
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002705 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002706
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002707 start = self_s;
2708 end = self_s + self_len;
2709 while (count-- > 0) {
2710 offset = findstring(start, end-start,
2711 from_s, from_len,
2712 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002713 if (offset == -1)
2714 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002715 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002716
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002717 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002718
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002719 result_s += (next-start);
2720 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002721 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002722 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002723 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002724}
2725
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002726/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002727Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002728replace_single_character_in_place(PyStringObject *self,
2729 char from_c, char to_c,
2730 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002731{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002732 char *self_s, *result_s, *start, *end, *next;
2733 Py_ssize_t self_len;
2734 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002735
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002736 /* The result string will be the same size */
2737 self_s = PyString_AS_STRING(self);
2738 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002739
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002740 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002741
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002742 if (next == NULL) {
2743 /* No matches; return the original string */
2744 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002745 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002746
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002747 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002748 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002749 if (result == NULL)
2750 return NULL;
2751 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002752 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002753
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002754 /* change everything in-place, starting with this one */
2755 start = result_s + (next-self_s);
2756 *start = to_c;
2757 start++;
2758 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002759
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002760 while (--maxcount > 0) {
2761 next = findchar(start, end-start, from_c);
2762 if (next == NULL)
2763 break;
2764 *next = to_c;
2765 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002766 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002767
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002768 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002769}
2770
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002772Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002773replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002774 const char *from_s, Py_ssize_t from_len,
2775 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002776 Py_ssize_t maxcount)
2777{
2778 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002779 char *self_s;
2780 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002781 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002782
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002784
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002785 self_s = PyString_AS_STRING(self);
2786 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002787
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788 offset = findstring(self_s, self_len,
2789 from_s, from_len,
2790 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002791 if (offset == -1) {
2792 /* No matches; return the original string */
2793 return return_self(self);
2794 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002795
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002796 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002797 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002798 if (result == NULL)
2799 return NULL;
2800 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002801 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002802
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002803 /* change everything in-place, starting with this one */
2804 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002805 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002806 start += from_len;
2807 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002808
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002809 while ( --maxcount > 0) {
2810 offset = findstring(start, end-start,
2811 from_s, from_len,
2812 0, end-start, FORWARD);
2813 if (offset==-1)
2814 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002815 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002816 start += offset+from_len;
2817 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002818
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002819 return result;
2820}
2821
2822/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002823Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002824replace_single_character(PyStringObject *self,
2825 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002826 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002827 Py_ssize_t maxcount)
2828{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002829 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002830 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002831 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002832 Py_ssize_t count, product;
2833 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002834
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002835 self_s = PyString_AS_STRING(self);
2836 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002837
Andrew Dalke51324072006-05-26 20:25:22 +00002838 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002839 if (count == 0) {
2840 /* no matches, return unchanged */
2841 return return_self(self);
2842 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002843
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002844 /* use the difference between current and new, hence the "-1" */
2845 /* result_len = self_len + count * (to_len-1) */
2846 product = count * (to_len-1);
2847 if (product / (to_len-1) != count) {
2848 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2849 return NULL;
2850 }
2851 result_len = self_len + product;
2852 if (result_len < 0) {
2853 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2854 return NULL;
2855 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002856
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002857 if ( (result = (PyStringObject *)
2858 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2859 return NULL;
2860 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002861
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002862 start = self_s;
2863 end = self_s + self_len;
2864 while (count-- > 0) {
2865 next = findchar(start, end-start, from_c);
2866 if (next == NULL)
2867 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002868
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002869 if (next == start) {
2870 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002871 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002872 result_s += to_len;
2873 start += 1;
2874 } else {
2875 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002876 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002877 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002878 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002879 result_s += to_len;
2880 start = next+1;
2881 }
2882 }
2883 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002884 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002885
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002886 return result;
2887}
2888
2889/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002890Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002891replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002892 const char *from_s, Py_ssize_t from_len,
2893 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002894 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002895 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002896 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002897 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898 Py_ssize_t count, offset, product;
2899 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002900
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002901 self_s = PyString_AS_STRING(self);
2902 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002903
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002904 count = countstring(self_s, self_len,
2905 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002906 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002907 if (count == 0) {
2908 /* no matches, return unchanged */
2909 return return_self(self);
2910 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002911
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002912 /* Check for overflow */
2913 /* result_len = self_len + count * (to_len-from_len) */
2914 product = count * (to_len-from_len);
2915 if (product / (to_len-from_len) != count) {
2916 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2917 return NULL;
2918 }
2919 result_len = self_len + product;
2920 if (result_len < 0) {
2921 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2922 return NULL;
2923 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002924
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002925 if ( (result = (PyStringObject *)
2926 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2927 return NULL;
2928 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002929
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002930 start = self_s;
2931 end = self_s + self_len;
2932 while (count-- > 0) {
2933 offset = findstring(start, end-start,
2934 from_s, from_len,
2935 0, end-start, FORWARD);
2936 if (offset == -1)
2937 break;
2938 next = start+offset;
2939 if (next == start) {
2940 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002941 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002942 result_s += to_len;
2943 start += from_len;
2944 } else {
2945 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002946 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002947 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002948 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002949 result_s += to_len;
2950 start = next+from_len;
2951 }
2952 }
2953 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002954 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002955
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002956 return result;
2957}
2958
2959
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002960Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002961replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002962 const char *from_s, Py_ssize_t from_len,
2963 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002964 Py_ssize_t maxcount)
2965{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002966 if (maxcount < 0) {
2967 maxcount = PY_SSIZE_T_MAX;
2968 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2969 /* nothing to do; return the original string */
2970 return return_self(self);
2971 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002972
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002973 if (maxcount == 0 ||
2974 (from_len == 0 && to_len == 0)) {
2975 /* nothing to do; return the original string */
2976 return return_self(self);
2977 }
2978
2979 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002980
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002981 if (from_len == 0) {
2982 /* insert the 'to' string everywhere. */
2983 /* >>> "Python".replace("", ".") */
2984 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002985 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002986 }
2987
2988 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2989 /* point for an empty self string to generate a non-empty string */
2990 /* Special case so the remaining code always gets a non-empty string */
2991 if (PyString_GET_SIZE(self) == 0) {
2992 return return_self(self);
2993 }
2994
2995 if (to_len == 0) {
2996 /* delete all occurances of 'from' string */
2997 if (from_len == 1) {
2998 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002999 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003000 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003001 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003002 }
3003 }
3004
3005 /* Handle special case where both strings have the same length */
3006
3007 if (from_len == to_len) {
3008 if (from_len == 1) {
3009 return replace_single_character_in_place(
3010 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003011 from_s[0],
3012 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003013 maxcount);
3014 } else {
3015 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003016 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003017 }
3018 }
3019
3020 /* Otherwise use the more generic algorithms */
3021 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003022 return replace_single_character(self, from_s[0],
3023 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003024 } else {
3025 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003026 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003027 }
3028}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003029
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003030PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003031"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003032\n\
3033Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003034old replaced by new. If the optional argument count is\n\
3035given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003036
3037static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003038string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003039{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003040 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003041 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003042 const char *from_s, *to_s;
3043 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003044
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003045 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003046 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003047
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003048 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003049 from_s = PyString_AS_STRING(from);
3050 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003051 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003052#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003053 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003054 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003055 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003056#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003057 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003058 return NULL;
3059
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003060 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003061 to_s = PyString_AS_STRING(to);
3062 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003063 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003064#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003065 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003066 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003067 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003068#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003069 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003070 return NULL;
3071
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003072 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003073 from_s, from_len,
3074 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003075}
3076
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003077/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003078
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003079/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003080 * against substr, using the start and end arguments. Returns
3081 * -1 on error, 0 if not found and 1 if found.
3082 */
3083Py_LOCAL(int)
3084_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3085 Py_ssize_t end, int direction)
3086{
3087 Py_ssize_t len = PyString_GET_SIZE(self);
3088 Py_ssize_t slen;
3089 const char* sub;
3090 const char* str;
3091
3092 if (PyString_Check(substr)) {
3093 sub = PyString_AS_STRING(substr);
3094 slen = PyString_GET_SIZE(substr);
3095 }
3096#ifdef Py_USING_UNICODE
3097 else if (PyUnicode_Check(substr))
3098 return PyUnicode_Tailmatch((PyObject *)self,
3099 substr, start, end, direction);
3100#endif
3101 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3102 return -1;
3103 str = PyString_AS_STRING(self);
3104
3105 string_adjust_indices(&start, &end, len);
3106
3107 if (direction < 0) {
3108 /* startswith */
3109 if (start+slen > len)
3110 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003111 } else {
3112 /* endswith */
3113 if (end-start < slen || start > len)
3114 return 0;
3115
3116 if (end-slen > start)
3117 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003118 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003119 if (end-start >= slen)
3120 return ! memcmp(str+start, sub, slen);
3121 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003122}
3123
3124
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003125PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003126"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003127\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003128Return True if S starts with the specified prefix, False otherwise.\n\
3129With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003130With optional end, stop comparing S at that position.\n\
3131prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003132
3133static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003134string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003135{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003136 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003137 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003138 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003139 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003140
Guido van Rossumc6821402000-05-08 14:08:05 +00003141 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3142 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003144 if (PyTuple_Check(subobj)) {
3145 Py_ssize_t i;
3146 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3147 result = _string_tailmatch(self,
3148 PyTuple_GET_ITEM(subobj, i),
3149 start, end, -1);
3150 if (result == -1)
3151 return NULL;
3152 else if (result) {
3153 Py_RETURN_TRUE;
3154 }
3155 }
3156 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003157 }
Georg Brandl24250812006-06-09 18:45:48 +00003158 result = _string_tailmatch(self, subobj, start, end, -1);
3159 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003160 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003161 else
Georg Brandl24250812006-06-09 18:45:48 +00003162 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003163}
3164
3165
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003166PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003167"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003168\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003169Return True if S ends with the specified suffix, False otherwise.\n\
3170With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003171With optional end, stop comparing S at that position.\n\
3172suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003173
3174static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003175string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003176{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003177 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003178 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003179 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003180 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003181
Guido van Rossumc6821402000-05-08 14:08:05 +00003182 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3183 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003184 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003185 if (PyTuple_Check(subobj)) {
3186 Py_ssize_t i;
3187 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3188 result = _string_tailmatch(self,
3189 PyTuple_GET_ITEM(subobj, i),
3190 start, end, +1);
3191 if (result == -1)
3192 return NULL;
3193 else if (result) {
3194 Py_RETURN_TRUE;
3195 }
3196 }
3197 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003198 }
Georg Brandl24250812006-06-09 18:45:48 +00003199 result = _string_tailmatch(self, subobj, start, end, +1);
3200 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003201 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003202 else
Georg Brandl24250812006-06-09 18:45:48 +00003203 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003204}
3205
3206
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003207PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003208"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003209\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003210Encodes S using the codec registered for encoding. encoding defaults\n\
3211to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003212handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003213a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3214'xmlcharrefreplace' as well as any other name registered with\n\
3215codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003216
3217static PyObject *
3218string_encode(PyStringObject *self, PyObject *args)
3219{
3220 char *encoding = NULL;
3221 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003222 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003223
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003224 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3225 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003226 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003227 if (v == NULL)
3228 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003229 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3230 PyErr_Format(PyExc_TypeError,
3231 "encoder did not return a string/unicode object "
3232 "(type=%.400s)",
3233 v->ob_type->tp_name);
3234 Py_DECREF(v);
3235 return NULL;
3236 }
3237 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003238
3239 onError:
3240 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003241}
3242
3243
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003244PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003245"S.decode([encoding[,errors]]) -> object\n\
3246\n\
3247Decodes S using the codec registered for encoding. encoding defaults\n\
3248to the default encoding. errors may be given to set a different error\n\
3249handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003250a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3251as well as any other name registerd with codecs.register_error that is\n\
3252able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003253
3254static PyObject *
3255string_decode(PyStringObject *self, PyObject *args)
3256{
3257 char *encoding = NULL;
3258 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003259 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003260
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003261 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3262 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003263 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003264 if (v == NULL)
3265 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003266 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3267 PyErr_Format(PyExc_TypeError,
3268 "decoder did not return a string/unicode object "
3269 "(type=%.400s)",
3270 v->ob_type->tp_name);
3271 Py_DECREF(v);
3272 return NULL;
3273 }
3274 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003275
3276 onError:
3277 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003278}
3279
3280
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003281PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003282"S.expandtabs([tabsize]) -> string\n\
3283\n\
3284Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003285If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003286
3287static PyObject*
3288string_expandtabs(PyStringObject *self, PyObject *args)
3289{
3290 const char *e, *p;
3291 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003292 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003293 PyObject *u;
3294 int tabsize = 8;
3295
3296 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3297 return NULL;
3298
Thomas Wouters7e474022000-07-16 12:04:32 +00003299 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003300 i = j = 0;
3301 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3302 for (p = PyString_AS_STRING(self); p < e; p++)
3303 if (*p == '\t') {
3304 if (tabsize > 0)
3305 j += tabsize - (j % tabsize);
3306 }
3307 else {
3308 j++;
3309 if (*p == '\n' || *p == '\r') {
3310 i += j;
3311 j = 0;
3312 }
3313 }
3314
3315 /* Second pass: create output string and fill it */
3316 u = PyString_FromStringAndSize(NULL, i + j);
3317 if (!u)
3318 return NULL;
3319
3320 j = 0;
3321 q = PyString_AS_STRING(u);
3322
3323 for (p = PyString_AS_STRING(self); p < e; p++)
3324 if (*p == '\t') {
3325 if (tabsize > 0) {
3326 i = tabsize - (j % tabsize);
3327 j += i;
3328 while (i--)
3329 *q++ = ' ';
3330 }
3331 }
3332 else {
3333 j++;
3334 *q++ = *p;
3335 if (*p == '\n' || *p == '\r')
3336 j = 0;
3337 }
3338
3339 return u;
3340}
3341
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003342Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003343pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003344{
3345 PyObject *u;
3346
3347 if (left < 0)
3348 left = 0;
3349 if (right < 0)
3350 right = 0;
3351
Tim Peters8fa5dd02001-09-12 02:18:30 +00003352 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003353 Py_INCREF(self);
3354 return (PyObject *)self;
3355 }
3356
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003357 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003358 left + PyString_GET_SIZE(self) + right);
3359 if (u) {
3360 if (left)
3361 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003362 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003363 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003364 PyString_GET_SIZE(self));
3365 if (right)
3366 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3367 fill, right);
3368 }
3369
3370 return u;
3371}
3372
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003373PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003374"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003375"\n"
3376"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003377"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003378
3379static PyObject *
3380string_ljust(PyStringObject *self, PyObject *args)
3381{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003382 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003383 char fillchar = ' ';
3384
Thomas Wouters4abb3662006-04-19 14:50:15 +00003385 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003386 return NULL;
3387
Tim Peters8fa5dd02001-09-12 02:18:30 +00003388 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003389 Py_INCREF(self);
3390 return (PyObject*) self;
3391 }
3392
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003393 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003394}
3395
3396
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003397PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003398"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003399"\n"
3400"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003401"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003402
3403static PyObject *
3404string_rjust(PyStringObject *self, PyObject *args)
3405{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003406 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003407 char fillchar = ' ';
3408
Thomas Wouters4abb3662006-04-19 14:50:15 +00003409 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003410 return NULL;
3411
Tim Peters8fa5dd02001-09-12 02:18:30 +00003412 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003413 Py_INCREF(self);
3414 return (PyObject*) self;
3415 }
3416
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003417 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003418}
3419
3420
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003421PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003422"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003423"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003424"Return S centered in a string of length width. Padding is\n"
3425"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003426
3427static PyObject *
3428string_center(PyStringObject *self, PyObject *args)
3429{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003430 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003431 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003432 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003433
Thomas Wouters4abb3662006-04-19 14:50:15 +00003434 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003435 return NULL;
3436
Tim Peters8fa5dd02001-09-12 02:18:30 +00003437 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003438 Py_INCREF(self);
3439 return (PyObject*) self;
3440 }
3441
3442 marg = width - PyString_GET_SIZE(self);
3443 left = marg / 2 + (marg & width & 1);
3444
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003445 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003446}
3447
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003448PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003449"S.zfill(width) -> string\n"
3450"\n"
3451"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003452"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003453
3454static PyObject *
3455string_zfill(PyStringObject *self, PyObject *args)
3456{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003457 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003458 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003459 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003460 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003461
Thomas Wouters4abb3662006-04-19 14:50:15 +00003462 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003463 return NULL;
3464
3465 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003466 if (PyString_CheckExact(self)) {
3467 Py_INCREF(self);
3468 return (PyObject*) self;
3469 }
3470 else
3471 return PyString_FromStringAndSize(
3472 PyString_AS_STRING(self),
3473 PyString_GET_SIZE(self)
3474 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003475 }
3476
3477 fill = width - PyString_GET_SIZE(self);
3478
3479 s = pad(self, fill, 0, '0');
3480
3481 if (s == NULL)
3482 return NULL;
3483
3484 p = PyString_AS_STRING(s);
3485 if (p[fill] == '+' || p[fill] == '-') {
3486 /* move sign to beginning of string */
3487 p[0] = p[fill];
3488 p[fill] = '0';
3489 }
3490
3491 return (PyObject*) s;
3492}
3493
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003494PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003495"S.isspace() -> bool\n\
3496\n\
3497Return True if all characters in S are whitespace\n\
3498and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003499
3500static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003501string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003502{
Fred Drakeba096332000-07-09 07:04:36 +00003503 register const unsigned char *p
3504 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003505 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003506
Guido van Rossum4c08d552000-03-10 22:55:18 +00003507 /* Shortcut for single character strings */
3508 if (PyString_GET_SIZE(self) == 1 &&
3509 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003510 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003511
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003512 /* Special case for empty strings */
3513 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003514 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003515
Guido van Rossum4c08d552000-03-10 22:55:18 +00003516 e = p + PyString_GET_SIZE(self);
3517 for (; p < e; p++) {
3518 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003519 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003520 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003521 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003522}
3523
3524
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003525PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003526"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003527\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003528Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003529and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003530
3531static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003532string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003533{
Fred Drakeba096332000-07-09 07:04:36 +00003534 register const unsigned char *p
3535 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003536 register const unsigned char *e;
3537
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003538 /* Shortcut for single character strings */
3539 if (PyString_GET_SIZE(self) == 1 &&
3540 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003541 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003542
3543 /* Special case for empty strings */
3544 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003545 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003546
3547 e = p + PyString_GET_SIZE(self);
3548 for (; p < e; p++) {
3549 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003550 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003551 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003552 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003553}
3554
3555
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003556PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003557"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003558\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003559Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003560and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003561
3562static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003563string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003564{
Fred Drakeba096332000-07-09 07:04:36 +00003565 register const unsigned char *p
3566 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003567 register const unsigned char *e;
3568
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003569 /* Shortcut for single character strings */
3570 if (PyString_GET_SIZE(self) == 1 &&
3571 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003572 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003573
3574 /* Special case for empty strings */
3575 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003576 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003577
3578 e = p + PyString_GET_SIZE(self);
3579 for (; p < e; p++) {
3580 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003581 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003582 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003583 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003584}
3585
3586
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003587PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003588"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003589\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003590Return True if all characters in S are digits\n\
3591and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003592
3593static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003594string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003595{
Fred Drakeba096332000-07-09 07:04:36 +00003596 register const unsigned char *p
3597 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003598 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003599
Guido van Rossum4c08d552000-03-10 22:55:18 +00003600 /* Shortcut for single character strings */
3601 if (PyString_GET_SIZE(self) == 1 &&
3602 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003603 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003604
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003605 /* Special case for empty strings */
3606 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003607 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003608
Guido van Rossum4c08d552000-03-10 22:55:18 +00003609 e = p + PyString_GET_SIZE(self);
3610 for (; p < e; p++) {
3611 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003612 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003613 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003614 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003615}
3616
3617
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003618PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003619"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003620\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003621Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003622at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003623
3624static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003625string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003626{
Fred Drakeba096332000-07-09 07:04:36 +00003627 register const unsigned char *p
3628 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003629 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003630 int cased;
3631
Guido van Rossum4c08d552000-03-10 22:55:18 +00003632 /* Shortcut for single character strings */
3633 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003634 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003636 /* Special case for empty strings */
3637 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003638 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003639
Guido van Rossum4c08d552000-03-10 22:55:18 +00003640 e = p + PyString_GET_SIZE(self);
3641 cased = 0;
3642 for (; p < e; p++) {
3643 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003645 else if (!cased && islower(*p))
3646 cased = 1;
3647 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003649}
3650
3651
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003652PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003653"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003655Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003656at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003657
3658static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003659string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660{
Fred Drakeba096332000-07-09 07:04:36 +00003661 register const unsigned char *p
3662 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003663 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664 int cased;
3665
Guido van Rossum4c08d552000-03-10 22:55:18 +00003666 /* Shortcut for single character strings */
3667 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003668 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003670 /* Special case for empty strings */
3671 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003672 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003673
Guido van Rossum4c08d552000-03-10 22:55:18 +00003674 e = p + PyString_GET_SIZE(self);
3675 cased = 0;
3676 for (; p < e; p++) {
3677 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003678 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003679 else if (!cased && isupper(*p))
3680 cased = 1;
3681 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003682 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003683}
3684
3685
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003686PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003687"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003688\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003689Return True if S is a titlecased string and there is at least one\n\
3690character in S, i.e. uppercase characters may only follow uncased\n\
3691characters and lowercase characters only cased ones. Return False\n\
3692otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003693
3694static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003695string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003696{
Fred Drakeba096332000-07-09 07:04:36 +00003697 register const unsigned char *p
3698 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003699 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003700 int cased, previous_is_cased;
3701
Guido van Rossum4c08d552000-03-10 22:55:18 +00003702 /* Shortcut for single character strings */
3703 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003704 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003706 /* Special case for empty strings */
3707 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003708 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003709
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710 e = p + PyString_GET_SIZE(self);
3711 cased = 0;
3712 previous_is_cased = 0;
3713 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003714 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715
3716 if (isupper(ch)) {
3717 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003718 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719 previous_is_cased = 1;
3720 cased = 1;
3721 }
3722 else if (islower(ch)) {
3723 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003724 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725 previous_is_cased = 1;
3726 cased = 1;
3727 }
3728 else
3729 previous_is_cased = 0;
3730 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003731 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003732}
3733
3734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003735PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003736"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737\n\
3738Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003739Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003740is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741
Guido van Rossum4c08d552000-03-10 22:55:18 +00003742static PyObject*
3743string_splitlines(PyStringObject *self, PyObject *args)
3744{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003745 register Py_ssize_t i;
3746 register Py_ssize_t j;
3747 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003748 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003749 PyObject *list;
3750 PyObject *str;
3751 char *data;
3752
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003753 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003754 return NULL;
3755
3756 data = PyString_AS_STRING(self);
3757 len = PyString_GET_SIZE(self);
3758
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003759 /* This does not use the preallocated list because splitlines is
3760 usually run with hundreds of newlines. The overhead of
3761 switching between PyList_SET_ITEM and append causes about a
3762 2-3% slowdown for that common case. A smarter implementation
3763 could move the if check out, so the SET_ITEMs are done first
3764 and the appends only done when the prealloc buffer is full.
3765 That's too much work for little gain.*/
3766
Guido van Rossum4c08d552000-03-10 22:55:18 +00003767 list = PyList_New(0);
3768 if (!list)
3769 goto onError;
3770
3771 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003772 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003773
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774 /* Find a line and append it */
3775 while (i < len && data[i] != '\n' && data[i] != '\r')
3776 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003777
3778 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003779 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003780 if (i < len) {
3781 if (data[i] == '\r' && i + 1 < len &&
3782 data[i+1] == '\n')
3783 i += 2;
3784 else
3785 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003786 if (keepends)
3787 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003788 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003789 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003790 j = i;
3791 }
3792 if (j < len) {
3793 SPLIT_APPEND(data, j, len);
3794 }
3795
3796 return list;
3797
3798 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003799 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800 return NULL;
3801}
3802
3803#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003804#undef SPLIT_ADD
3805#undef MAX_PREALLOC
3806#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003807
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003808static PyObject *
3809string_getnewargs(PyStringObject *v)
3810{
3811 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3812}
3813
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003814
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003815static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003816string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003817 /* Counterparts of the obsolete stropmodule functions; except
3818 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003819 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3820 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003821 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003822 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3823 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003824 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3825 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3826 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3827 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3828 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3829 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3830 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003831 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3832 capitalize__doc__},
3833 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3834 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3835 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003836 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003837 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3838 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3839 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3840 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3841 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3842 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3843 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003844 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3845 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003846 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3847 startswith__doc__},
3848 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3849 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3850 swapcase__doc__},
3851 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3852 translate__doc__},
3853 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3854 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3855 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3856 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3857 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3858 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3859 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3860 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3861 expandtabs__doc__},
3862 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3863 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003864 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003865 {NULL, NULL} /* sentinel */
3866};
3867
Jeremy Hylton938ace62002-07-17 16:30:39 +00003868static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003869str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3870
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003871static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003872string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003873{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003874 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003875 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003876
Guido van Rossumae960af2001-08-30 03:11:59 +00003877 if (type != &PyString_Type)
3878 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003879 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3880 return NULL;
3881 if (x == NULL)
3882 return PyString_FromString("");
3883 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003884}
3885
Guido van Rossumae960af2001-08-30 03:11:59 +00003886static PyObject *
3887str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3888{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003889 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003890 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003891
3892 assert(PyType_IsSubtype(type, &PyString_Type));
3893 tmp = string_new(&PyString_Type, args, kwds);
3894 if (tmp == NULL)
3895 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003896 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003897 n = PyString_GET_SIZE(tmp);
3898 pnew = type->tp_alloc(type, n);
3899 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003900 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003901 ((PyStringObject *)pnew)->ob_shash =
3902 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003903 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003904 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003905 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003906 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003907}
3908
Guido van Rossumcacfc072002-05-24 19:01:59 +00003909static PyObject *
3910basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3911{
3912 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003913 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003914 return NULL;
3915}
3916
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003917static PyObject *
3918string_mod(PyObject *v, PyObject *w)
3919{
3920 if (!PyString_Check(v)) {
3921 Py_INCREF(Py_NotImplemented);
3922 return Py_NotImplemented;
3923 }
3924 return PyString_Format(v, w);
3925}
3926
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003927PyDoc_STRVAR(basestring_doc,
3928"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003929
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003930static PyNumberMethods string_as_number = {
3931 0, /*nb_add*/
3932 0, /*nb_subtract*/
3933 0, /*nb_multiply*/
3934 0, /*nb_divide*/
3935 string_mod, /*nb_remainder*/
3936};
3937
3938
Guido van Rossumcacfc072002-05-24 19:01:59 +00003939PyTypeObject PyBaseString_Type = {
3940 PyObject_HEAD_INIT(&PyType_Type)
3941 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003942 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003943 0,
3944 0,
3945 0, /* tp_dealloc */
3946 0, /* tp_print */
3947 0, /* tp_getattr */
3948 0, /* tp_setattr */
3949 0, /* tp_compare */
3950 0, /* tp_repr */
3951 0, /* tp_as_number */
3952 0, /* tp_as_sequence */
3953 0, /* tp_as_mapping */
3954 0, /* tp_hash */
3955 0, /* tp_call */
3956 0, /* tp_str */
3957 0, /* tp_getattro */
3958 0, /* tp_setattro */
3959 0, /* tp_as_buffer */
3960 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3961 basestring_doc, /* tp_doc */
3962 0, /* tp_traverse */
3963 0, /* tp_clear */
3964 0, /* tp_richcompare */
3965 0, /* tp_weaklistoffset */
3966 0, /* tp_iter */
3967 0, /* tp_iternext */
3968 0, /* tp_methods */
3969 0, /* tp_members */
3970 0, /* tp_getset */
3971 &PyBaseObject_Type, /* tp_base */
3972 0, /* tp_dict */
3973 0, /* tp_descr_get */
3974 0, /* tp_descr_set */
3975 0, /* tp_dictoffset */
3976 0, /* tp_init */
3977 0, /* tp_alloc */
3978 basestring_new, /* tp_new */
3979 0, /* tp_free */
3980};
3981
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003982PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003983"str(object) -> string\n\
3984\n\
3985Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003986If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003987
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003988PyTypeObject PyString_Type = {
3989 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003990 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003991 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003992 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003993 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00003994 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003995 (printfunc)string_print, /* tp_print */
3996 0, /* tp_getattr */
3997 0, /* tp_setattr */
3998 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00003999 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004000 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004001 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004002 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004003 (hashfunc)string_hash, /* tp_hash */
4004 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004005 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004006 PyObject_GenericGetAttr, /* tp_getattro */
4007 0, /* tp_setattro */
4008 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004009 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004010 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004011 string_doc, /* tp_doc */
4012 0, /* tp_traverse */
4013 0, /* tp_clear */
4014 (richcmpfunc)string_richcompare, /* tp_richcompare */
4015 0, /* tp_weaklistoffset */
4016 0, /* tp_iter */
4017 0, /* tp_iternext */
4018 string_methods, /* tp_methods */
4019 0, /* tp_members */
4020 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004021 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004022 0, /* tp_dict */
4023 0, /* tp_descr_get */
4024 0, /* tp_descr_set */
4025 0, /* tp_dictoffset */
4026 0, /* tp_init */
4027 0, /* tp_alloc */
4028 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004029 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004030};
4031
4032void
Fred Drakeba096332000-07-09 07:04:36 +00004033PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004034{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004035 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004036 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004037 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004038 if (w == NULL || !PyString_Check(*pv)) {
4039 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004040 *pv = NULL;
4041 return;
4042 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004043 v = string_concat((PyStringObject *) *pv, w);
4044 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004045 *pv = v;
4046}
4047
Guido van Rossum013142a1994-08-30 08:19:36 +00004048void
Fred Drakeba096332000-07-09 07:04:36 +00004049PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004050{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004051 PyString_Concat(pv, w);
4052 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004053}
4054
4055
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004056/* The following function breaks the notion that strings are immutable:
4057 it changes the size of a string. We get away with this only if there
4058 is only one module referencing the object. You can also think of it
4059 as creating a new string object and destroying the old one, only
4060 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004061 already be known to some other part of the code...
4062 Note that if there's not enough memory to resize the string, the original
4063 string object at *pv is deallocated, *pv is set to NULL, an "out of
4064 memory" exception is set, and -1 is returned. Else (on success) 0 is
4065 returned, and the value in *pv may or may not be the same as on input.
4066 As always, an extra byte is allocated for a trailing \0 byte (newsize
4067 does *not* include that), and a trailing \0 byte is stored.
4068*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004069
4070int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004071_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004072{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004073 register PyObject *v;
4074 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004075 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004076 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4077 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004078 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004079 Py_DECREF(v);
4080 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004081 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004082 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004083 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004084 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004085 _Py_ForgetReference(v);
4086 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004087 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004088 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004089 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004090 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004091 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004092 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004093 _Py_NewReference(*pv);
4094 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004095 sv->ob_size = newsize;
4096 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004097 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004098 return 0;
4099}
Guido van Rossume5372401993-03-16 12:15:04 +00004100
4101/* Helpers for formatstring */
4102
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004103Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004104getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004105{
Thomas Wouters977485d2006-02-16 15:59:12 +00004106 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004107 if (argidx < arglen) {
4108 (*p_argidx)++;
4109 if (arglen < 0)
4110 return args;
4111 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004112 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004113 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004114 PyErr_SetString(PyExc_TypeError,
4115 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004116 return NULL;
4117}
4118
Tim Peters38fd5b62000-09-21 05:43:11 +00004119/* Format codes
4120 * F_LJUST '-'
4121 * F_SIGN '+'
4122 * F_BLANK ' '
4123 * F_ALT '#'
4124 * F_ZERO '0'
4125 */
Guido van Rossume5372401993-03-16 12:15:04 +00004126#define F_LJUST (1<<0)
4127#define F_SIGN (1<<1)
4128#define F_BLANK (1<<2)
4129#define F_ALT (1<<3)
4130#define F_ZERO (1<<4)
4131
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004132Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004133formatfloat(char *buf, size_t buflen, int flags,
4134 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004135{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004136 /* fmt = '%#.' + `prec` + `type`
4137 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004138 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004139 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004140 x = PyFloat_AsDouble(v);
4141 if (x == -1.0 && PyErr_Occurred()) {
4142 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004143 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004144 }
Guido van Rossume5372401993-03-16 12:15:04 +00004145 if (prec < 0)
4146 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004147 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4148 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004149 /* Worst case length calc to ensure no buffer overrun:
4150
4151 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004152 fmt = %#.<prec>g
4153 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004154 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004155 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004156
4157 'f' formats:
4158 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4159 len = 1 + 50 + 1 + prec = 52 + prec
4160
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004161 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004162 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004163
4164 */
4165 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4166 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004167 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004168 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004169 return -1;
4170 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004171 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4172 (flags&F_ALT) ? "#" : "",
4173 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004174 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004175 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004176}
4177
Tim Peters38fd5b62000-09-21 05:43:11 +00004178/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4179 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4180 * Python's regular ints.
4181 * Return value: a new PyString*, or NULL if error.
4182 * . *pbuf is set to point into it,
4183 * *plen set to the # of chars following that.
4184 * Caller must decref it when done using pbuf.
4185 * The string starting at *pbuf is of the form
4186 * "-"? ("0x" | "0X")? digit+
4187 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004188 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004189 * There will be at least prec digits, zero-filled on the left if
4190 * necessary to get that many.
4191 * val object to be converted
4192 * flags bitmask of format flags; only F_ALT is looked at
4193 * prec minimum number of digits; 0-fill on left if needed
4194 * type a character in [duoxX]; u acts the same as d
4195 *
4196 * CAUTION: o, x and X conversions on regular ints can never
4197 * produce a '-' sign, but can for Python's unbounded ints.
4198 */
4199PyObject*
4200_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4201 char **pbuf, int *plen)
4202{
4203 PyObject *result = NULL;
4204 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004205 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004206 int sign; /* 1 if '-', else 0 */
4207 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004208 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004209 int numdigits; /* len == numnondigits + numdigits */
4210 int numnondigits = 0;
4211
4212 switch (type) {
4213 case 'd':
4214 case 'u':
4215 result = val->ob_type->tp_str(val);
4216 break;
4217 case 'o':
4218 result = val->ob_type->tp_as_number->nb_oct(val);
4219 break;
4220 case 'x':
4221 case 'X':
4222 numnondigits = 2;
4223 result = val->ob_type->tp_as_number->nb_hex(val);
4224 break;
4225 default:
4226 assert(!"'type' not in [duoxX]");
4227 }
4228 if (!result)
4229 return NULL;
4230
4231 /* To modify the string in-place, there can only be one reference. */
4232 if (result->ob_refcnt != 1) {
4233 PyErr_BadInternalCall();
4234 return NULL;
4235 }
4236 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00004237 llen = PyString_Size(result);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00004238 if (llen > PY_SSIZE_T_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004239 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4240 return NULL;
4241 }
4242 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004243 if (buf[len-1] == 'L') {
4244 --len;
4245 buf[len] = '\0';
4246 }
4247 sign = buf[0] == '-';
4248 numnondigits += sign;
4249 numdigits = len - numnondigits;
4250 assert(numdigits > 0);
4251
Tim Petersfff53252001-04-12 18:38:48 +00004252 /* Get rid of base marker unless F_ALT */
4253 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004254 /* Need to skip 0x, 0X or 0. */
4255 int skipped = 0;
4256 switch (type) {
4257 case 'o':
4258 assert(buf[sign] == '0');
4259 /* If 0 is only digit, leave it alone. */
4260 if (numdigits > 1) {
4261 skipped = 1;
4262 --numdigits;
4263 }
4264 break;
4265 case 'x':
4266 case 'X':
4267 assert(buf[sign] == '0');
4268 assert(buf[sign + 1] == 'x');
4269 skipped = 2;
4270 numnondigits -= 2;
4271 break;
4272 }
4273 if (skipped) {
4274 buf += skipped;
4275 len -= skipped;
4276 if (sign)
4277 buf[0] = '-';
4278 }
4279 assert(len == numnondigits + numdigits);
4280 assert(numdigits > 0);
4281 }
4282
4283 /* Fill with leading zeroes to meet minimum width. */
4284 if (prec > numdigits) {
4285 PyObject *r1 = PyString_FromStringAndSize(NULL,
4286 numnondigits + prec);
4287 char *b1;
4288 if (!r1) {
4289 Py_DECREF(result);
4290 return NULL;
4291 }
4292 b1 = PyString_AS_STRING(r1);
4293 for (i = 0; i < numnondigits; ++i)
4294 *b1++ = *buf++;
4295 for (i = 0; i < prec - numdigits; i++)
4296 *b1++ = '0';
4297 for (i = 0; i < numdigits; i++)
4298 *b1++ = *buf++;
4299 *b1 = '\0';
4300 Py_DECREF(result);
4301 result = r1;
4302 buf = PyString_AS_STRING(result);
4303 len = numnondigits + prec;
4304 }
4305
4306 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004307 if (type == 'X') {
4308 /* Need to convert all lower case letters to upper case.
4309 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004310 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004311 if (buf[i] >= 'a' && buf[i] <= 'x')
4312 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004313 }
4314 *pbuf = buf;
4315 *plen = len;
4316 return result;
4317}
4318
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004319Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004320formatint(char *buf, size_t buflen, int flags,
4321 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004322{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004323 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004324 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4325 + 1 + 1 = 24 */
4326 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004327 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004328 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004329
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004330 x = PyInt_AsLong(v);
4331 if (x == -1 && PyErr_Occurred()) {
4332 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004333 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004334 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004335 if (x < 0 && type == 'u') {
4336 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004337 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004338 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4339 sign = "-";
4340 else
4341 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004342 if (prec < 0)
4343 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004344
4345 if ((flags & F_ALT) &&
4346 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004347 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004348 * of issues that cause pain:
4349 * - when 0 is being converted, the C standard leaves off
4350 * the '0x' or '0X', which is inconsistent with other
4351 * %#x/%#X conversions and inconsistent with Python's
4352 * hex() function
4353 * - there are platforms that violate the standard and
4354 * convert 0 with the '0x' or '0X'
4355 * (Metrowerks, Compaq Tru64)
4356 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004357 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004358 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004359 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004360 * We can achieve the desired consistency by inserting our
4361 * own '0x' or '0X' prefix, and substituting %x/%X in place
4362 * of %#x/%#X.
4363 *
4364 * Note that this is the same approach as used in
4365 * formatint() in unicodeobject.c
4366 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004367 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4368 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004369 }
4370 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004371 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4372 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004373 prec, type);
4374 }
4375
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004376 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4377 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004378 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004379 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004380 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004381 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004382 return -1;
4383 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004384 if (sign[0])
4385 PyOS_snprintf(buf, buflen, fmt, -x);
4386 else
4387 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004388 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004389}
4390
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004391Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004392formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004393{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004394 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004395 if (PyString_Check(v)) {
4396 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004397 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004398 }
4399 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004400 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004401 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004402 }
4403 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004404 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004405}
4406
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004407/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4408
4409 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4410 chars are formatted. XXX This is a magic number. Each formatting
4411 routine does bounds checking to ensure no overflow, but a better
4412 solution may be to malloc a buffer of appropriate size for each
4413 format. For now, the current solution is sufficient.
4414*/
4415#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004416
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004417PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004418PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004419{
4420 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004421 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004422 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004423 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004424 PyObject *result, *orig_args;
4425#ifdef Py_USING_UNICODE
4426 PyObject *v, *w;
4427#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004428 PyObject *dict = NULL;
4429 if (format == NULL || !PyString_Check(format) || args == NULL) {
4430 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004431 return NULL;
4432 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004433 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004434 fmt = PyString_AS_STRING(format);
4435 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004436 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004437 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004438 if (result == NULL)
4439 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004440 res = PyString_AsString(result);
4441 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004442 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004443 argidx = 0;
4444 }
4445 else {
4446 arglen = -1;
4447 argidx = -2;
4448 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004449 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4450 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004451 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004452 while (--fmtcnt >= 0) {
4453 if (*fmt != '%') {
4454 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004455 rescnt = fmtcnt + 100;
4456 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004457 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004458 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004459 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004460 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004461 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004462 }
4463 *res++ = *fmt++;
4464 }
4465 else {
4466 /* Got a format specifier */
4467 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004468 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004469 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004470 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004471 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004472 PyObject *v = NULL;
4473 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004474 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004475 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004476 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004477 char formatbuf[FORMATBUFLEN];
4478 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004479#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004480 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004481 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004482#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004483
Guido van Rossumda9c2711996-12-05 21:58:58 +00004484 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004485 if (*fmt == '(') {
4486 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004487 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004488 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004489 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004490
4491 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004492 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004493 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004494 goto error;
4495 }
4496 ++fmt;
4497 --fmtcnt;
4498 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004499 /* Skip over balanced parentheses */
4500 while (pcount > 0 && --fmtcnt >= 0) {
4501 if (*fmt == ')')
4502 --pcount;
4503 else if (*fmt == '(')
4504 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004505 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004506 }
4507 keylen = fmt - keystart - 1;
4508 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004509 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004510 "incomplete format key");
4511 goto error;
4512 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004513 key = PyString_FromStringAndSize(keystart,
4514 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004515 if (key == NULL)
4516 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004517 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004518 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004519 args_owned = 0;
4520 }
4521 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004522 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004523 if (args == NULL) {
4524 goto error;
4525 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004526 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004527 arglen = -1;
4528 argidx = -2;
4529 }
Guido van Rossume5372401993-03-16 12:15:04 +00004530 while (--fmtcnt >= 0) {
4531 switch (c = *fmt++) {
4532 case '-': flags |= F_LJUST; continue;
4533 case '+': flags |= F_SIGN; continue;
4534 case ' ': flags |= F_BLANK; continue;
4535 case '#': flags |= F_ALT; continue;
4536 case '0': flags |= F_ZERO; continue;
4537 }
4538 break;
4539 }
4540 if (c == '*') {
4541 v = getnextarg(args, arglen, &argidx);
4542 if (v == NULL)
4543 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004544 if (!PyInt_Check(v)) {
4545 PyErr_SetString(PyExc_TypeError,
4546 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004547 goto error;
4548 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004549 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004550 if (width < 0) {
4551 flags |= F_LJUST;
4552 width = -width;
4553 }
Guido van Rossume5372401993-03-16 12:15:04 +00004554 if (--fmtcnt >= 0)
4555 c = *fmt++;
4556 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004557 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004558 width = c - '0';
4559 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004560 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004561 if (!isdigit(c))
4562 break;
4563 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004564 PyErr_SetString(
4565 PyExc_ValueError,
4566 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004567 goto error;
4568 }
4569 width = width*10 + (c - '0');
4570 }
4571 }
4572 if (c == '.') {
4573 prec = 0;
4574 if (--fmtcnt >= 0)
4575 c = *fmt++;
4576 if (c == '*') {
4577 v = getnextarg(args, arglen, &argidx);
4578 if (v == NULL)
4579 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004580 if (!PyInt_Check(v)) {
4581 PyErr_SetString(
4582 PyExc_TypeError,
4583 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004584 goto error;
4585 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004586 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004587 if (prec < 0)
4588 prec = 0;
4589 if (--fmtcnt >= 0)
4590 c = *fmt++;
4591 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004592 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004593 prec = c - '0';
4594 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004595 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004596 if (!isdigit(c))
4597 break;
4598 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004599 PyErr_SetString(
4600 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004601 "prec too big");
4602 goto error;
4603 }
4604 prec = prec*10 + (c - '0');
4605 }
4606 }
4607 } /* prec */
4608 if (fmtcnt >= 0) {
4609 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004610 if (--fmtcnt >= 0)
4611 c = *fmt++;
4612 }
4613 }
4614 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004615 PyErr_SetString(PyExc_ValueError,
4616 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004617 goto error;
4618 }
4619 if (c != '%') {
4620 v = getnextarg(args, arglen, &argidx);
4621 if (v == NULL)
4622 goto error;
4623 }
4624 sign = 0;
4625 fill = ' ';
4626 switch (c) {
4627 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004628 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004629 len = 1;
4630 break;
4631 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004632#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004633 if (PyUnicode_Check(v)) {
4634 fmt = fmt_start;
4635 argidx = argidx_start;
4636 goto unicode;
4637 }
Georg Brandld45014b2005-10-01 17:06:00 +00004638#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004639 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004640#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004641 if (temp != NULL && PyUnicode_Check(temp)) {
4642 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004643 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004644 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004645 goto unicode;
4646 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004647#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004648 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004649 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004650 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004651 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004652 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004653 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004654 if (!PyString_Check(temp)) {
4655 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004656 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004657 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004658 goto error;
4659 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004660 pbuf = PyString_AS_STRING(temp);
4661 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004662 if (prec >= 0 && len > prec)
4663 len = prec;
4664 break;
4665 case 'i':
4666 case 'd':
4667 case 'u':
4668 case 'o':
4669 case 'x':
4670 case 'X':
4671 if (c == 'i')
4672 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004673 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004674 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004675 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004676 prec, c, &pbuf, &ilen);
4677 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004678 if (!temp)
4679 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004680 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004681 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004682 else {
4683 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004684 len = formatint(pbuf,
4685 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004686 flags, prec, c, v);
4687 if (len < 0)
4688 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004689 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004690 }
4691 if (flags & F_ZERO)
4692 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004693 break;
4694 case 'e':
4695 case 'E':
4696 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004697 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004698 case 'g':
4699 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004700 if (c == 'F')
4701 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004702 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004703 len = formatfloat(pbuf, sizeof(formatbuf),
4704 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004705 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004706 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004707 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004708 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004709 fill = '0';
4710 break;
4711 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004712#ifdef Py_USING_UNICODE
4713 if (PyUnicode_Check(v)) {
4714 fmt = fmt_start;
4715 argidx = argidx_start;
4716 goto unicode;
4717 }
4718#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004719 pbuf = formatbuf;
4720 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004721 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004722 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004723 break;
4724 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004725 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004726 "unsupported format character '%c' (0x%x) "
4727 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004728 c, c,
4729 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004730 goto error;
4731 }
4732 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004733 if (*pbuf == '-' || *pbuf == '+') {
4734 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004735 len--;
4736 }
4737 else if (flags & F_SIGN)
4738 sign = '+';
4739 else if (flags & F_BLANK)
4740 sign = ' ';
4741 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004742 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004743 }
4744 if (width < len)
4745 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004746 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004747 reslen -= rescnt;
4748 rescnt = width + fmtcnt + 100;
4749 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004750 if (reslen < 0) {
4751 Py_DECREF(result);
4752 return PyErr_NoMemory();
4753 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004754 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004755 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004756 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004757 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004758 }
4759 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004760 if (fill != ' ')
4761 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004762 rescnt--;
4763 if (width > len)
4764 width--;
4765 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004766 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4767 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004768 assert(pbuf[1] == c);
4769 if (fill != ' ') {
4770 *res++ = *pbuf++;
4771 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004772 }
Tim Petersfff53252001-04-12 18:38:48 +00004773 rescnt -= 2;
4774 width -= 2;
4775 if (width < 0)
4776 width = 0;
4777 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004778 }
4779 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004780 do {
4781 --rescnt;
4782 *res++ = fill;
4783 } while (--width > len);
4784 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004785 if (fill == ' ') {
4786 if (sign)
4787 *res++ = sign;
4788 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004789 (c == 'x' || c == 'X')) {
4790 assert(pbuf[0] == '0');
4791 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004792 *res++ = *pbuf++;
4793 *res++ = *pbuf++;
4794 }
4795 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004796 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004797 res += len;
4798 rescnt -= len;
4799 while (--width >= len) {
4800 --rescnt;
4801 *res++ = ' ';
4802 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004803 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004804 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004805 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004806 goto error;
4807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004808 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004809 } /* '%' */
4810 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004811 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004812 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004813 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004814 goto error;
4815 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004816 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004817 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004818 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004819 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004820 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004821
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004822#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004823 unicode:
4824 if (args_owned) {
4825 Py_DECREF(args);
4826 args_owned = 0;
4827 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004828 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004829 if (PyTuple_Check(orig_args) && argidx > 0) {
4830 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004831 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004832 v = PyTuple_New(n);
4833 if (v == NULL)
4834 goto error;
4835 while (--n >= 0) {
4836 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4837 Py_INCREF(w);
4838 PyTuple_SET_ITEM(v, n, w);
4839 }
4840 args = v;
4841 } else {
4842 Py_INCREF(orig_args);
4843 args = orig_args;
4844 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004845 args_owned = 1;
4846 /* Take what we have of the result and let the Unicode formatting
4847 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004848 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004849 if (_PyString_Resize(&result, rescnt))
4850 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004851 fmtcnt = PyString_GET_SIZE(format) - \
4852 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004853 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4854 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004855 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004856 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004857 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004858 if (v == NULL)
4859 goto error;
4860 /* Paste what we have (result) to what the Unicode formatting
4861 function returned (v) and return the result (or error) */
4862 w = PyUnicode_Concat(result, v);
4863 Py_DECREF(result);
4864 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004865 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004866 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004867#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004868
Guido van Rossume5372401993-03-16 12:15:04 +00004869 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004870 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004871 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004872 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004873 }
Guido van Rossume5372401993-03-16 12:15:04 +00004874 return NULL;
4875}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004876
Guido van Rossum2a61e741997-01-18 07:55:05 +00004877void
Fred Drakeba096332000-07-09 07:04:36 +00004878PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004879{
4880 register PyStringObject *s = (PyStringObject *)(*p);
4881 PyObject *t;
4882 if (s == NULL || !PyString_Check(s))
4883 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004884 /* If it's a string subclass, we don't really know what putting
4885 it in the interned dict might do. */
4886 if (!PyString_CheckExact(s))
4887 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004888 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004889 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004890 if (interned == NULL) {
4891 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004892 if (interned == NULL) {
4893 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004894 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004895 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004896 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004897 t = PyDict_GetItem(interned, (PyObject *)s);
4898 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004899 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004900 Py_DECREF(*p);
4901 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004902 return;
4903 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004904
Armin Rigo79f7ad22004-08-07 19:27:39 +00004905 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004906 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004907 return;
4908 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004909 /* The two references in interned are not counted by refcnt.
4910 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004911 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004912 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004913}
4914
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004915void
4916PyString_InternImmortal(PyObject **p)
4917{
4918 PyString_InternInPlace(p);
4919 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4920 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4921 Py_INCREF(*p);
4922 }
4923}
4924
Guido van Rossum2a61e741997-01-18 07:55:05 +00004925
4926PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004927PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004928{
4929 PyObject *s = PyString_FromString(cp);
4930 if (s == NULL)
4931 return NULL;
4932 PyString_InternInPlace(&s);
4933 return s;
4934}
4935
Guido van Rossum8cf04761997-08-02 02:57:45 +00004936void
Fred Drakeba096332000-07-09 07:04:36 +00004937PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004938{
4939 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004940 for (i = 0; i < UCHAR_MAX + 1; i++) {
4941 Py_XDECREF(characters[i]);
4942 characters[i] = NULL;
4943 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004944 Py_XDECREF(nullstring);
4945 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004946}
Barry Warsawa903ad982001-02-23 16:40:48 +00004947
Barry Warsawa903ad982001-02-23 16:40:48 +00004948void _Py_ReleaseInternedStrings(void)
4949{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004950 PyObject *keys;
4951 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004952 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004953
4954 if (interned == NULL || !PyDict_Check(interned))
4955 return;
4956 keys = PyDict_Keys(interned);
4957 if (keys == NULL || !PyList_Check(keys)) {
4958 PyErr_Clear();
4959 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004960 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004961
4962 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4963 detector, interned strings are not forcibly deallocated; rather, we
4964 give them their stolen references back, and then clear and DECREF
4965 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004966
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004967 fprintf(stderr, "releasing interned strings\n");
4968 n = PyList_GET_SIZE(keys);
4969 for (i = 0; i < n; i++) {
4970 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4971 switch (s->ob_sstate) {
4972 case SSTATE_NOT_INTERNED:
4973 /* XXX Shouldn't happen */
4974 break;
4975 case SSTATE_INTERNED_IMMORTAL:
4976 s->ob_refcnt += 1;
4977 break;
4978 case SSTATE_INTERNED_MORTAL:
4979 s->ob_refcnt += 2;
4980 break;
4981 default:
4982 Py_FatalError("Inconsistent interned string state.");
4983 }
4984 s->ob_sstate = SSTATE_NOT_INTERNED;
4985 }
4986 Py_DECREF(keys);
4987 PyDict_Clear(interned);
4988 Py_DECREF(interned);
4989 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004990}