blob: 7cd613dd87292e05c319c0346a547f0260d078b3 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Gregory P. Smith14acde32008-04-09 23:41:13 +000057 if (size < 0) {
58 PyErr_SetString(PyExc_SystemError,
59 "Negative size passed to PyString_FromStringAndSize");
60 return NULL;
61 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 if (size == 0 && (op = nullstring) != NULL) {
63#ifdef COUNT_ALLOCS
64 null_strings++;
65#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 Py_INCREF(op);
67 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000068 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 if (size == 1 && str != NULL &&
70 (op = characters[*str & UCHAR_MAX]) != NULL)
71 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072#ifdef COUNT_ALLOCS
73 one_strings++;
74#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000075 Py_INCREF(op);
76 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000078
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000079 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000080 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000083 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000085 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000086 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000087 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000088 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000089 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000097 PyObject *t = (PyObject *)op;
98 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000099 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000100 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000102 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000103 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000104}
105
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000106PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000107PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000108{
Tim Peters62de65b2001-12-06 20:29:32 +0000109 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000110 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000111
112 assert(str != NULL);
113 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000114 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000115 PyErr_SetString(PyExc_OverflowError,
116 "string is too long for a Python string");
117 return NULL;
118 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 if (size == 0 && (op = nullstring) != NULL) {
120#ifdef COUNT_ALLOCS
121 null_strings++;
122#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 Py_INCREF(op);
124 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125 }
126 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
127#ifdef COUNT_ALLOCS
128 one_strings++;
129#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000130 Py_INCREF(op);
131 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000132 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000134 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000135 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000136 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000137 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000138 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000140 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000141 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000142 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000150 PyObject *t = (PyObject *)op;
151 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000152 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000153 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000154 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000155 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000156 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000157}
158
Barry Warsawdadace02001-08-24 18:32:06 +0000159PyObject *
160PyString_FromFormatV(const char *format, va_list vargs)
161{
Tim Petersc15c4f12001-10-02 21:32:07 +0000162 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000163 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000164 const char* f;
165 char *s;
166 PyObject* string;
167
Tim Petersc15c4f12001-10-02 21:32:07 +0000168#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000169 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000170#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#ifdef __va_copy
172 __va_copy(count, vargs);
173#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000174 count = vargs;
175#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000176#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000177 /* step 1: figure out how large a buffer we need */
178 for (f = format; *f; f++) {
179 if (*f == '%') {
180 const char* p = f;
181 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
182 ;
183
Tim Peters8931ff12006-05-13 23:28:20 +0000184 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
185 * they don't affect the amount of space we reserve.
186 */
187 if ((*f == 'l' || *f == 'z') &&
188 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000189 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000190
Barry Warsawdadace02001-08-24 18:32:06 +0000191 switch (*f) {
192 case 'c':
193 (void)va_arg(count, int);
194 /* fall through... */
195 case '%':
196 n++;
197 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000198 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000199 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000200 /* 20 bytes is enough to hold a 64-bit
201 integer. Decimal takes the most space.
202 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000203 n += 20;
204 break;
205 case 's':
206 s = va_arg(count, char*);
207 n += strlen(s);
208 break;
209 case 'p':
210 (void) va_arg(count, int);
211 /* maximum 64-bit pointer representation:
212 * 0xffffffffffffffff
213 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000214 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000215 */
216 n += 19;
217 break;
218 default:
219 /* if we stumble upon an unknown
220 formatting code, copy the rest of
221 the format string to the output
222 string. (we cannot just skip the
223 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000224 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000225 n += strlen(p);
226 goto expand;
227 }
228 } else
229 n++;
230 }
231 expand:
232 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000233 /* Since we've analyzed how much space we need for the worst case,
234 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000235 string = PyString_FromStringAndSize(NULL, n);
236 if (!string)
237 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000238
Barry Warsawdadace02001-08-24 18:32:06 +0000239 s = PyString_AsString(string);
240
241 for (f = format; *f; f++) {
242 if (*f == '%') {
243 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000244 Py_ssize_t i;
245 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000246 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000247 /* parse the width.precision part (we're only
248 interested in the precision value, if any) */
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 if (*f == '.') {
253 f++;
254 n = 0;
255 while (isdigit(Py_CHARMASK(*f)))
256 n = (n*10) + *f++ - '0';
257 }
258 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
259 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000260 /* handle the long flag, but only for %ld and %lu.
261 others can be added when necessary. */
262 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000263 longflag = 1;
264 ++f;
265 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000266 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000267 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000268 size_tflag = 1;
269 ++f;
270 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000271
Barry Warsawdadace02001-08-24 18:32:06 +0000272 switch (*f) {
273 case 'c':
274 *s++ = va_arg(vargs, int);
275 break;
276 case 'd':
277 if (longflag)
278 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000279 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000280 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
281 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000282 else
283 sprintf(s, "%d", va_arg(vargs, int));
284 s += strlen(s);
285 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000286 case 'u':
287 if (longflag)
288 sprintf(s, "%lu",
289 va_arg(vargs, unsigned long));
290 else if (size_tflag)
291 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
292 va_arg(vargs, size_t));
293 else
294 sprintf(s, "%u",
295 va_arg(vargs, unsigned int));
296 s += strlen(s);
297 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000298 case 'i':
299 sprintf(s, "%i", va_arg(vargs, int));
300 s += strlen(s);
301 break;
302 case 'x':
303 sprintf(s, "%x", va_arg(vargs, int));
304 s += strlen(s);
305 break;
306 case 's':
307 p = va_arg(vargs, char*);
308 i = strlen(p);
309 if (n > 0 && i > n)
310 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000311 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000312 s += i;
313 break;
314 case 'p':
315 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000316 /* %p is ill-defined: ensure leading 0x. */
317 if (s[1] == 'X')
318 s[1] = 'x';
319 else if (s[1] != 'x') {
320 memmove(s+2, s, strlen(s)+1);
321 s[0] = '0';
322 s[1] = 'x';
323 }
Barry Warsawdadace02001-08-24 18:32:06 +0000324 s += strlen(s);
325 break;
326 case '%':
327 *s++ = '%';
328 break;
329 default:
330 strcpy(s, p);
331 s += strlen(s);
332 goto end;
333 }
334 } else
335 *s++ = *f;
336 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000339 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000340 return string;
341}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000342
Barry Warsawdadace02001-08-24 18:32:06 +0000343PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000344PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000345{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000346 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000347 va_list vargs;
348
349#ifdef HAVE_STDARG_PROTOTYPES
350 va_start(vargs, format);
351#else
352 va_start(vargs);
353#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000354 ret = PyString_FromFormatV(format, vargs);
355 va_end(vargs);
356 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000357}
358
359
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000361 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000362 const char *encoding,
363 const char *errors)
364{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365 PyObject *v, *str;
366
367 str = PyString_FromStringAndSize(s, size);
368 if (str == NULL)
369 return NULL;
370 v = PyString_AsDecodedString(str, encoding, errors);
371 Py_DECREF(str);
372 return v;
373}
374
375PyObject *PyString_AsDecodedObject(PyObject *str,
376 const char *encoding,
377 const char *errors)
378{
379 PyObject *v;
380
381 if (!PyString_Check(str)) {
382 PyErr_BadArgument();
383 goto onError;
384 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000385
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000386 if (encoding == NULL) {
387#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000388 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000389#else
390 PyErr_SetString(PyExc_ValueError, "no encoding specified");
391 goto onError;
392#endif
393 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000394
395 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000396 v = PyCodec_Decode(str, encoding, errors);
397 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000398 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000399
400 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000401
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000402 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000403 return NULL;
404}
405
406PyObject *PyString_AsDecodedString(PyObject *str,
407 const char *encoding,
408 const char *errors)
409{
410 PyObject *v;
411
412 v = PyString_AsDecodedObject(str, encoding, errors);
413 if (v == NULL)
414 goto onError;
415
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000416#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000417 /* Convert Unicode to a string using the default encoding */
418 if (PyUnicode_Check(v)) {
419 PyObject *temp = v;
420 v = PyUnicode_AsEncodedString(v, NULL, NULL);
421 Py_DECREF(temp);
422 if (v == NULL)
423 goto onError;
424 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000425#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000426 if (!PyString_Check(v)) {
427 PyErr_Format(PyExc_TypeError,
428 "decoder did not return a string object (type=%.400s)",
429 v->ob_type->tp_name);
430 Py_DECREF(v);
431 goto onError;
432 }
433
434 return v;
435
436 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 return NULL;
438}
439
440PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000441 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 const char *encoding,
443 const char *errors)
444{
445 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000446
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000447 str = PyString_FromStringAndSize(s, size);
448 if (str == NULL)
449 return NULL;
450 v = PyString_AsEncodedString(str, encoding, errors);
451 Py_DECREF(str);
452 return v;
453}
454
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000455PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 const char *encoding,
457 const char *errors)
458{
459 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000460
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000461 if (!PyString_Check(str)) {
462 PyErr_BadArgument();
463 goto onError;
464 }
465
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000466 if (encoding == NULL) {
467#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000469#else
470 PyErr_SetString(PyExc_ValueError, "no encoding specified");
471 goto onError;
472#endif
473 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000474
475 /* Encode via the codec registry */
476 v = PyCodec_Encode(str, encoding, errors);
477 if (v == NULL)
478 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000479
480 return v;
481
482 onError:
483 return NULL;
484}
485
486PyObject *PyString_AsEncodedString(PyObject *str,
487 const char *encoding,
488 const char *errors)
489{
490 PyObject *v;
491
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000492 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000493 if (v == NULL)
494 goto onError;
495
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000496#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000497 /* Convert Unicode to a string using the default encoding */
498 if (PyUnicode_Check(v)) {
499 PyObject *temp = v;
500 v = PyUnicode_AsEncodedString(v, NULL, NULL);
501 Py_DECREF(temp);
502 if (v == NULL)
503 goto onError;
504 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000505#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000506 if (!PyString_Check(v)) {
507 PyErr_Format(PyExc_TypeError,
508 "encoder did not return a string object (type=%.400s)",
509 v->ob_type->tp_name);
510 Py_DECREF(v);
511 goto onError;
512 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000513
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000514 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000515
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000516 onError:
517 return NULL;
518}
519
Guido van Rossum234f9421993-06-17 12:35:49 +0000520static void
Fred Drakeba096332000-07-09 07:04:36 +0000521string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000522{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000523 switch (PyString_CHECK_INTERNED(op)) {
524 case SSTATE_NOT_INTERNED:
525 break;
526
527 case SSTATE_INTERNED_MORTAL:
528 /* revive dead object temporarily for DelItem */
529 op->ob_refcnt = 3;
530 if (PyDict_DelItem(interned, op) != 0)
531 Py_FatalError(
532 "deletion of interned string failed");
533 break;
534
535 case SSTATE_INTERNED_IMMORTAL:
536 Py_FatalError("Immortal interned string died.");
537
538 default:
539 Py_FatalError("Inconsistent interned string state.");
540 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000541 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000542}
543
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000544/* Unescape a backslash-escaped string. If unicode is non-zero,
545 the string is a u-literal. If recode_encoding is non-zero,
546 the string is UTF-8 encoded and should be re-encoded in the
547 specified encoding. */
548
549PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000550 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000551 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000552 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000553 const char *recode_encoding)
554{
555 int c;
556 char *p, *buf;
557 const char *end;
558 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000559 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000560 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000561 if (v == NULL)
562 return NULL;
563 p = buf = PyString_AsString(v);
564 end = s + len;
565 while (s < end) {
566 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000567 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000568#ifdef Py_USING_UNICODE
569 if (recode_encoding && (*s & 0x80)) {
570 PyObject *u, *w;
571 char *r;
572 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000573 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000574 t = s;
575 /* Decode non-ASCII bytes as UTF-8. */
576 while (t < end && (*t & 0x80)) t++;
577 u = PyUnicode_DecodeUTF8(s, t - s, errors);
578 if(!u) goto failed;
579
580 /* Recode them in target encoding. */
581 w = PyUnicode_AsEncodedString(
582 u, recode_encoding, errors);
583 Py_DECREF(u);
584 if (!w) goto failed;
585
586 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000587 assert(PyString_Check(w));
588 r = PyString_AS_STRING(w);
589 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000590 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000591 p += rn;
592 Py_DECREF(w);
593 s = t;
594 } else {
595 *p++ = *s++;
596 }
597#else
598 *p++ = *s++;
599#endif
600 continue;
601 }
602 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000603 if (s==end) {
604 PyErr_SetString(PyExc_ValueError,
605 "Trailing \\ in string");
606 goto failed;
607 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000608 switch (*s++) {
609 /* XXX This assumes ASCII! */
610 case '\n': break;
611 case '\\': *p++ = '\\'; break;
612 case '\'': *p++ = '\''; break;
613 case '\"': *p++ = '\"'; break;
614 case 'b': *p++ = '\b'; break;
615 case 'f': *p++ = '\014'; break; /* FF */
616 case 't': *p++ = '\t'; break;
617 case 'n': *p++ = '\n'; break;
618 case 'r': *p++ = '\r'; break;
619 case 'v': *p++ = '\013'; break; /* VT */
620 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
621 case '0': case '1': case '2': case '3':
622 case '4': case '5': case '6': case '7':
623 c = s[-1] - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000624 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000625 c = (c<<3) + *s++ - '0';
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000626 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000627 c = (c<<3) + *s++ - '0';
628 }
629 *p++ = c;
630 break;
631 case 'x':
Georg Brandl1dcb9c92007-11-02 22:46:38 +0000632 if (s+1 < end &&
633 isxdigit(Py_CHARMASK(s[0])) &&
634 isxdigit(Py_CHARMASK(s[1])))
635 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000636 unsigned int x = 0;
637 c = Py_CHARMASK(*s);
638 s++;
639 if (isdigit(c))
640 x = c - '0';
641 else if (islower(c))
642 x = 10 + c - 'a';
643 else
644 x = 10 + c - 'A';
645 x = x << 4;
646 c = Py_CHARMASK(*s);
647 s++;
648 if (isdigit(c))
649 x += c - '0';
650 else if (islower(c))
651 x += 10 + c - 'a';
652 else
653 x += 10 + c - 'A';
654 *p++ = x;
655 break;
656 }
657 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000658 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000659 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000660 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000661 }
662 if (strcmp(errors, "replace") == 0) {
663 *p++ = '?';
664 } else if (strcmp(errors, "ignore") == 0)
665 /* do nothing */;
666 else {
667 PyErr_Format(PyExc_ValueError,
668 "decoding error; "
669 "unknown error handling code: %.400s",
670 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000671 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 }
673#ifndef Py_USING_UNICODE
674 case 'u':
675 case 'U':
676 case 'N':
677 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000678 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000679 "Unicode escapes not legal "
680 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000681 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683#endif
684 default:
685 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000686 s--;
687 goto non_esc; /* an arbitry number of unescaped
688 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000689 }
690 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000691 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000692 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000693 return v;
694 failed:
695 Py_DECREF(v);
696 return NULL;
697}
698
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000699/* -------------------------------------------------------------------- */
700/* object api */
701
Martin v. Löwis18e16552006-02-15 17:27:45 +0000702static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000703string_getsize(register PyObject *op)
704{
705 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000706 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000707 if (PyString_AsStringAndSize(op, &s, &len))
708 return -1;
709 return len;
710}
711
712static /*const*/ char *
713string_getbuffer(register PyObject *op)
714{
715 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000716 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000717 if (PyString_AsStringAndSize(op, &s, &len))
718 return NULL;
719 return s;
720}
721
Martin v. Löwis18e16552006-02-15 17:27:45 +0000722Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000723PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000724{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000725 if (!PyString_Check(op))
726 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000727 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000728}
729
730/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000731PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733 if (!PyString_Check(op))
734 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000736}
737
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000738int
739PyString_AsStringAndSize(register PyObject *obj,
740 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000741 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000742{
743 if (s == NULL) {
744 PyErr_BadInternalCall();
745 return -1;
746 }
747
748 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000750 if (PyUnicode_Check(obj)) {
751 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
752 if (obj == NULL)
753 return -1;
754 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000755 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000756#endif
757 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000758 PyErr_Format(PyExc_TypeError,
759 "expected string or Unicode object, "
760 "%.200s found", obj->ob_type->tp_name);
761 return -1;
762 }
763 }
764
765 *s = PyString_AS_STRING(obj);
766 if (len != NULL)
767 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000768 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000769 PyErr_SetString(PyExc_TypeError,
770 "expected string without null bytes");
771 return -1;
772 }
773 return 0;
774}
775
Fredrik Lundhaf722372006-05-25 17:55:31 +0000776/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000777/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000778
Fredrik Lundha50d2012006-05-26 17:04:58 +0000779#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000780
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000781#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000782#define STRINGLIB_LEN PyString_GET_SIZE
783#define STRINGLIB_NEW PyString_FromStringAndSize
784#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000785
Fredrik Lundhb9479482006-05-26 17:22:38 +0000786#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000787
Fredrik Lundha50d2012006-05-26 17:04:58 +0000788#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000789
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000790#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000791#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000792#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000793
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795static int
Fred Drakeba096332000-07-09 07:04:36 +0000796string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000798 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000799 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000800 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000801
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000802 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000803 if (! PyString_CheckExact(op)) {
804 int ret;
805 /* A str subclass may have its own __str__ method. */
806 op = (PyStringObject *) PyObject_Str((PyObject *)op);
807 if (op == NULL)
808 return -1;
809 ret = string_print(op, fp, flags);
810 Py_DECREF(op);
811 return ret;
812 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000813 if (flags & Py_PRINT_RAW) {
Armin Rigo4b63c212006-10-04 11:44:06 +0000814 char *data = op->ob_sval;
815 Py_ssize_t size = op->ob_size;
816 while (size > INT_MAX) {
817 /* Very long strings cannot be written atomically.
818 * But don't write exactly INT_MAX bytes at a time
819 * to avoid memory aligment issues.
820 */
821 const int chunk_size = INT_MAX & ~0x3FFF;
822 fwrite(data, 1, chunk_size, fp);
823 data += chunk_size;
824 size -= chunk_size;
825 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000826#ifdef __VMS
Armin Rigo4b63c212006-10-04 11:44:06 +0000827 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000828#else
Armin Rigo4b63c212006-10-04 11:44:06 +0000829 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000830#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000831 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000832 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000833
Thomas Wouters7e474022000-07-16 12:04:32 +0000834 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000835 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000836 if (memchr(op->ob_sval, '\'', op->ob_size) &&
837 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000838 quote = '"';
839
840 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000841 for (i = 0; i < op->ob_size; i++) {
842 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000843 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000844 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000845 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\r");
851 else if (c < ' ' || c >= 0x7f)
852 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000853 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000854 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000856 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000857 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000858}
859
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000860PyObject *
861PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000862{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000863 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000864 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000865 PyObject *v;
Armin Rigo4b63c212006-10-04 11:44:06 +0000866 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000867 PyErr_SetString(PyExc_OverflowError,
868 "string is too large to make repr");
Guido van Rossume6a6f392007-11-07 01:19:49 +0000869 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000870 }
871 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000872 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000873 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000874 }
875 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000876 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 register char c;
878 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000879 int quote;
880
Thomas Wouters7e474022000-07-16 12:04:32 +0000881 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000882 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000883 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000884 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000885 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000886 quote = '"';
887
Tim Peters9161c8b2001-12-03 01:55:38 +0000888 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000889 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000891 /* There's at least enough room for a hex escape
892 and a closing quote. */
893 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000894 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000895 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000896 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000897 else if (c == '\t')
898 *p++ = '\\', *p++ = 't';
899 else if (c == '\n')
900 *p++ = '\\', *p++ = 'n';
901 else if (c == '\r')
902 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000903 else if (c < ' ' || c >= 0x7f) {
904 /* For performance, we don't want to call
905 PyOS_snprintf here (extra layers of
906 function call). */
907 sprintf(p, "\\x%02x", c & 0xff);
908 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000909 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000910 else
911 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000913 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000914 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000916 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000917 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000918 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000919 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000920}
921
Guido van Rossum189f1df2001-05-01 16:51:53 +0000922static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000923string_repr(PyObject *op)
924{
925 return PyString_Repr(op, 1);
926}
927
928static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000929string_str(PyObject *s)
930{
Tim Petersc9933152001-10-16 20:18:24 +0000931 assert(PyString_Check(s));
932 if (PyString_CheckExact(s)) {
933 Py_INCREF(s);
934 return s;
935 }
936 else {
937 /* Subtype -- return genuine string with the same value. */
938 PyStringObject *t = (PyStringObject *) s;
939 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
940 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000941}
942
Martin v. Löwis18e16552006-02-15 17:27:45 +0000943static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000944string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000945{
946 return a->ob_size;
947}
948
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000949static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000950string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000951{
Andrew Dalke598710c2006-05-25 18:18:39 +0000952 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000953 register PyStringObject *op;
954 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000955#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000956 if (PyUnicode_Check(bb))
957 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000958#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000959 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000960 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000961 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000962 return NULL;
963 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000964#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000966 if ((a->ob_size == 0 || b->ob_size == 0) &&
967 PyString_CheckExact(a) && PyString_CheckExact(b)) {
968 if (a->ob_size == 0) {
969 Py_INCREF(bb);
970 return bb;
971 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000972 Py_INCREF(a);
973 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000974 }
975 size = a->ob_size + b->ob_size;
Andrew Dalke598710c2006-05-25 18:18:39 +0000976 if (size < 0) {
977 PyErr_SetString(PyExc_OverflowError,
978 "strings are too large to concat");
979 return NULL;
980 }
981
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000982 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000983 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000984 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000985 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000986 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000987 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000988 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000989 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
990 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000991 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000992 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993#undef b
994}
995
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000997string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000999 register Py_ssize_t i;
1000 register Py_ssize_t j;
1001 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001002 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001003 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001004 if (n < 0)
1005 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001006 /* watch out for overflows: the size can overflow int,
1007 * and the # of bytes needed can overflow size_t
1008 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001009 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001010 if (n && size / n != a->ob_size) {
1011 PyErr_SetString(PyExc_OverflowError,
1012 "repeated string is too long");
1013 return NULL;
1014 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001015 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001016 Py_INCREF(a);
1017 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001018 }
Tim Peterse7c05322004-06-27 17:24:49 +00001019 nbytes = (size_t)size;
1020 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001021 PyErr_SetString(PyExc_OverflowError,
1022 "repeated string is too long");
1023 return NULL;
1024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001025 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001026 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001027 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001028 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001029 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001030 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001031 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001032 op->ob_sval[size] = '\0';
1033 if (a->ob_size == 1 && n > 0) {
1034 memset(op->ob_sval, a->ob_sval[0] , n);
1035 return (PyObject *) op;
1036 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001037 i = 0;
1038 if (i < size) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001039 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001040 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001041 }
1042 while (i < size) {
1043 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001044 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001045 i += j;
1046 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001047 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001048}
1049
1050/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1051
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001052static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001053string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001054 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001055 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001056{
1057 if (i < 0)
1058 i = 0;
1059 if (j < 0)
1060 j = 0; /* Avoid signed/unsigned bug in next line */
1061 if (j > a->ob_size)
1062 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001063 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1064 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001065 Py_INCREF(a);
1066 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067 }
1068 if (j < i)
1069 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001070 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001071}
1072
Guido van Rossum9284a572000-03-07 15:53:43 +00001073static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001074string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001075{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001076 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001077#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001078 if (PyUnicode_Check(sub_obj))
1079 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001080#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 if (!PyString_Check(sub_obj)) {
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001082 PyErr_SetString(PyExc_TypeError,
1083 "'in <string>' requires string as left operand");
1084 return -1;
1085 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001086 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001087
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001088 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001089}
1090
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001091static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001092string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001094 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001095 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001096 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001097 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001098 return NULL;
1099 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001100 pchar = a->ob_sval[i];
1101 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001102 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001103 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001104 else {
1105#ifdef COUNT_ALLOCS
1106 one_strings++;
1107#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001108 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001109 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001110 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001111}
1112
Martin v. Löwiscd353062001-05-24 16:56:35 +00001113static PyObject*
1114string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001116 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001117 Py_ssize_t len_a, len_b;
1118 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001119 PyObject *result;
1120
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001121 /* Make sure both arguments are strings. */
1122 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 result = Py_NotImplemented;
1124 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001125 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001126 if (a == b) {
1127 switch (op) {
1128 case Py_EQ:case Py_LE:case Py_GE:
1129 result = Py_True;
1130 goto out;
1131 case Py_NE:case Py_LT:case Py_GT:
1132 result = Py_False;
1133 goto out;
1134 }
1135 }
1136 if (op == Py_EQ) {
1137 /* Supporting Py_NE here as well does not save
1138 much time, since Py_NE is rarely used. */
1139 if (a->ob_size == b->ob_size
1140 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001141 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001142 a->ob_size) == 0)) {
1143 result = Py_True;
1144 } else {
1145 result = Py_False;
1146 }
1147 goto out;
1148 }
1149 len_a = a->ob_size; len_b = b->ob_size;
1150 min_len = (len_a < len_b) ? len_a : len_b;
1151 if (min_len > 0) {
1152 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1153 if (c==0)
1154 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1155 }else
1156 c = 0;
1157 if (c == 0)
1158 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1159 switch (op) {
1160 case Py_LT: c = c < 0; break;
1161 case Py_LE: c = c <= 0; break;
1162 case Py_EQ: assert(0); break; /* unreachable */
1163 case Py_NE: c = c != 0; break;
1164 case Py_GT: c = c > 0; break;
1165 case Py_GE: c = c >= 0; break;
1166 default:
1167 result = Py_NotImplemented;
1168 goto out;
1169 }
1170 result = c ? Py_True : Py_False;
1171 out:
1172 Py_INCREF(result);
1173 return result;
1174}
1175
1176int
1177_PyString_Eq(PyObject *o1, PyObject *o2)
1178{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001179 PyStringObject *a = (PyStringObject*) o1;
1180 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001181 return a->ob_size == b->ob_size
1182 && *a->ob_sval == *b->ob_sval
1183 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001184}
1185
Guido van Rossum9bfef441993-03-29 10:43:31 +00001186static long
Fred Drakeba096332000-07-09 07:04:36 +00001187string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001188{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001189 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001190 register unsigned char *p;
1191 register long x;
1192
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001193 if (a->ob_shash != -1)
1194 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001195 len = a->ob_size;
1196 p = (unsigned char *) a->ob_sval;
1197 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001198 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001199 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001200 x ^= a->ob_size;
1201 if (x == -1)
1202 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001203 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001204 return x;
1205}
1206
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001207static PyObject*
1208string_subscript(PyStringObject* self, PyObject* item)
1209{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001210 if (PyIndex_Check(item)) {
1211 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001212 if (i == -1 && PyErr_Occurred())
1213 return NULL;
1214 if (i < 0)
1215 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001216 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001217 }
1218 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001219 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 char* source_buf;
1221 char* result_buf;
1222 PyObject* result;
1223
Tim Petersae1d0c92006-03-17 03:29:34 +00001224 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001225 PyString_GET_SIZE(self),
1226 &start, &stop, &step, &slicelength) < 0) {
1227 return NULL;
1228 }
1229
1230 if (slicelength <= 0) {
1231 return PyString_FromStringAndSize("", 0);
1232 }
1233 else {
1234 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001235 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001236 if (result_buf == NULL)
1237 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001238
Tim Petersae1d0c92006-03-17 03:29:34 +00001239 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001240 cur += step, i++) {
1241 result_buf[i] = source_buf[cur];
1242 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001243
1244 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001245 slicelength);
1246 PyMem_Free(result_buf);
1247 return result;
1248 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001249 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001250 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001251 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001252 "string indices must be integers");
1253 return NULL;
1254 }
1255}
1256
Martin v. Löwis18e16552006-02-15 17:27:45 +00001257static Py_ssize_t
1258string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001259{
1260 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001261 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001262 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001263 return -1;
1264 }
1265 *ptr = (void *)self->ob_sval;
1266 return self->ob_size;
1267}
1268
Martin v. Löwis18e16552006-02-15 17:27:45 +00001269static Py_ssize_t
1270string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001271{
Guido van Rossum045e6881997-09-08 18:30:11 +00001272 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001273 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274 return -1;
1275}
1276
Martin v. Löwis18e16552006-02-15 17:27:45 +00001277static Py_ssize_t
1278string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001279{
1280 if ( lenp )
1281 *lenp = self->ob_size;
1282 return 1;
1283}
1284
Martin v. Löwis18e16552006-02-15 17:27:45 +00001285static Py_ssize_t
1286string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001287{
1288 if ( index != 0 ) {
1289 PyErr_SetString(PyExc_SystemError,
1290 "accessing non-existent string segment");
1291 return -1;
1292 }
1293 *ptr = self->ob_sval;
1294 return self->ob_size;
1295}
1296
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001297static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001298 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001299 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001300 (ssizeargfunc)string_repeat, /*sq_repeat*/
1301 (ssizeargfunc)string_item, /*sq_item*/
1302 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001303 0, /*sq_ass_item*/
1304 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001305 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001306};
1307
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001308static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001309 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001310 (binaryfunc)string_subscript,
1311 0,
1312};
1313
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001314static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001315 (readbufferproc)string_buffer_getreadbuf,
1316 (writebufferproc)string_buffer_getwritebuf,
1317 (segcountproc)string_buffer_getsegcount,
1318 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001319};
1320
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321
1322
1323#define LEFTSTRIP 0
1324#define RIGHTSTRIP 1
1325#define BOTHSTRIP 2
1326
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001327/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001328static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1329
1330#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001331
Andrew Dalke525eab32006-05-26 14:00:45 +00001332
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001333/* Don't call if length < 2 */
1334#define Py_STRING_MATCH(target, offset, pattern, length) \
1335 (target[offset] == pattern[0] && \
1336 target[offset+length-1] == pattern[length-1] && \
1337 !memcmp(target+offset+1, pattern+1, length-2) )
1338
1339
Andrew Dalke525eab32006-05-26 14:00:45 +00001340/* Overallocate the initial list to reduce the number of reallocs for small
1341 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1342 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1343 text (roughly 11 words per line) and field delimited data (usually 1-10
1344 fields). For large strings the split algorithms are bandwidth limited
1345 so increasing the preallocation likely will not improve things.*/
1346
1347#define MAX_PREALLOC 12
1348
1349/* 5 splits gives 6 elements */
1350#define PREALLOC_SIZE(maxsplit) \
1351 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1352
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001353#define SPLIT_APPEND(data, left, right) \
1354 str = PyString_FromStringAndSize((data) + (left), \
1355 (right) - (left)); \
1356 if (str == NULL) \
1357 goto onError; \
1358 if (PyList_Append(list, str)) { \
1359 Py_DECREF(str); \
1360 goto onError; \
1361 } \
1362 else \
1363 Py_DECREF(str);
1364
Andrew Dalke02758d62006-05-26 15:21:01 +00001365#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001366 str = PyString_FromStringAndSize((data) + (left), \
1367 (right) - (left)); \
1368 if (str == NULL) \
1369 goto onError; \
1370 if (count < MAX_PREALLOC) { \
1371 PyList_SET_ITEM(list, count, str); \
1372 } else { \
1373 if (PyList_Append(list, str)) { \
1374 Py_DECREF(str); \
1375 goto onError; \
1376 } \
1377 else \
1378 Py_DECREF(str); \
1379 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001380 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001381
1382/* Always force the list to the expected size. */
Neal Norwitzb16e4e72006-06-01 05:32:49 +00001383#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001384
Andrew Dalke02758d62006-05-26 15:21:01 +00001385#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1386#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1387#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1388#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1389
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001390Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001391split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001392{
Andrew Dalke525eab32006-05-26 14:00:45 +00001393 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001394 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001395 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396
1397 if (list == NULL)
1398 return NULL;
1399
Andrew Dalke02758d62006-05-26 15:21:01 +00001400 i = j = 0;
1401
1402 while (maxsplit-- > 0) {
1403 SKIP_SPACE(s, i, len);
1404 if (i==len) break;
1405 j = i; i++;
1406 SKIP_NONSPACE(s, i, len);
1407 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001408 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001409
1410 if (i < len) {
1411 /* Only occurs when maxsplit was reached */
1412 /* Skip any remaining whitespace and copy to end of string */
1413 SKIP_SPACE(s, i, len);
1414 if (i != len)
1415 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001416 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001417 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001419 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420 Py_DECREF(list);
1421 return NULL;
1422}
1423
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001424Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001425split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001426{
Andrew Dalke525eab32006-05-26 14:00:45 +00001427 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001428 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001429 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001430
1431 if (list == NULL)
1432 return NULL;
1433
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001434 i = j = 0;
1435 while ((j < len) && (maxcount-- > 0)) {
1436 for(; j<len; j++) {
1437 /* I found that using memchr makes no difference */
1438 if (s[j] == ch) {
1439 SPLIT_ADD(s, i, j);
1440 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001441 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001442 }
1443 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001444 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001445 if (i <= len) {
1446 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001447 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001448 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001449 return list;
1450
1451 onError:
1452 Py_DECREF(list);
1453 return NULL;
1454}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001456PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457"S.split([sep [,maxsplit]]) -> list of strings\n\
1458\n\
1459Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001460delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001461splits are done. If sep is not specified or is None, any\n\
1462whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001463
1464static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001465string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001466{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001467 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001468 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001469 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001470 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001471#ifdef USE_FAST
1472 Py_ssize_t pos;
1473#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474
Martin v. Löwis9c830762006-04-13 08:37:17 +00001475 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001477 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001478 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001479 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001480 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 if (PyString_Check(subobj)) {
1482 sub = PyString_AS_STRING(subobj);
1483 n = PyString_GET_SIZE(subobj);
1484 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001485#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001486 else if (PyUnicode_Check(subobj))
1487 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001488#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001489 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1490 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001491
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492 if (n == 0) {
1493 PyErr_SetString(PyExc_ValueError, "empty separator");
1494 return NULL;
1495 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001496 else if (n == 1)
1497 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498
Andrew Dalke525eab32006-05-26 14:00:45 +00001499 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500 if (list == NULL)
1501 return NULL;
1502
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001503#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001505 while (maxsplit-- > 0) {
1506 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1507 if (pos < 0)
1508 break;
1509 j = i+pos;
1510 SPLIT_ADD(s, i, j);
1511 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001512 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001513#else
1514 i = j = 0;
1515 while ((j+n <= len) && (maxsplit-- > 0)) {
1516 for (; j+n <= len; j++) {
1517 if (Py_STRING_MATCH(s, j, sub, n)) {
1518 SPLIT_ADD(s, i, j);
1519 i = j = j + n;
1520 break;
1521 }
1522 }
1523 }
1524#endif
1525 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001526 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527 return list;
1528
Andrew Dalke525eab32006-05-26 14:00:45 +00001529 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001530 Py_DECREF(list);
1531 return NULL;
1532}
1533
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001534PyDoc_STRVAR(partition__doc__,
1535"S.partition(sep) -> (head, sep, tail)\n\
1536\n\
1537Searches for the separator sep in S, and returns the part before it,\n\
1538the separator itself, and the part after it. If the separator is not\n\
1539found, returns S and two empty strings.");
1540
1541static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001542string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001543{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001544 const char *sep;
1545 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001546
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001547 if (PyString_Check(sep_obj)) {
1548 sep = PyString_AS_STRING(sep_obj);
1549 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001550 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001551#ifdef Py_USING_UNICODE
1552 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001553 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001554#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001555 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001556 return NULL;
1557
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001558 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001559 (PyObject*) self,
1560 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1561 sep_obj, sep, sep_len
1562 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001563}
1564
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001565PyDoc_STRVAR(rpartition__doc__,
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001566"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001567\n\
1568Searches for the separator sep in S, starting at the end of S, and returns\n\
1569the part before it, the separator itself, and the part after it. If the\n\
Neal Norwitz29a5fdb2006-09-05 02:21:38 +00001570separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001571
1572static PyObject *
1573string_rpartition(PyStringObject *self, PyObject *sep_obj)
1574{
1575 const char *sep;
1576 Py_ssize_t sep_len;
1577
1578 if (PyString_Check(sep_obj)) {
1579 sep = PyString_AS_STRING(sep_obj);
1580 sep_len = PyString_GET_SIZE(sep_obj);
1581 }
1582#ifdef Py_USING_UNICODE
1583 else if (PyUnicode_Check(sep_obj))
1584 return PyUnicode_Partition((PyObject *) self, sep_obj);
1585#endif
1586 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1587 return NULL;
1588
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001589 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001590 (PyObject*) self,
1591 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1592 sep_obj, sep, sep_len
1593 );
1594}
1595
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001596Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001597rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001598{
Andrew Dalke525eab32006-05-26 14:00:45 +00001599 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001600 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001601 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001602
1603 if (list == NULL)
1604 return NULL;
1605
Andrew Dalke02758d62006-05-26 15:21:01 +00001606 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001607
Andrew Dalke02758d62006-05-26 15:21:01 +00001608 while (maxsplit-- > 0) {
1609 RSKIP_SPACE(s, i);
1610 if (i<0) break;
1611 j = i; i--;
1612 RSKIP_NONSPACE(s, i);
1613 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001614 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001615 if (i >= 0) {
1616 /* Only occurs when maxsplit was reached */
1617 /* Skip any remaining whitespace and copy to beginning of string */
1618 RSKIP_SPACE(s, i);
1619 if (i >= 0)
1620 SPLIT_ADD(s, 0, i + 1);
1621
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001622 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001623 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001624 if (PyList_Reverse(list) < 0)
1625 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001626 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001627 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001628 Py_DECREF(list);
1629 return NULL;
1630}
1631
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001632Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis83687c92006-04-13 08:52:56 +00001633rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001634{
Andrew Dalke525eab32006-05-26 14:00:45 +00001635 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001636 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001637 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001638
1639 if (list == NULL)
1640 return NULL;
1641
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001642 i = j = len - 1;
1643 while ((i >= 0) && (maxcount-- > 0)) {
1644 for (; i >= 0; i--) {
1645 if (s[i] == ch) {
1646 SPLIT_ADD(s, i + 1, j + 1);
1647 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001648 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001649 }
1650 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001651 }
1652 if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001653 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001654 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001655 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001656 if (PyList_Reverse(list) < 0)
1657 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001658 return list;
1659
1660 onError:
1661 Py_DECREF(list);
1662 return NULL;
1663}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001664
1665PyDoc_STRVAR(rsplit__doc__,
1666"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1667\n\
1668Return a list of the words in the string S, using sep as the\n\
1669delimiter string, starting at the end of the string and working\n\
1670to the front. If maxsplit is given, at most maxsplit splits are\n\
1671done. If sep is not specified or is None, any whitespace string\n\
1672is a separator.");
1673
1674static PyObject *
1675string_rsplit(PyStringObject *self, PyObject *args)
1676{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001677 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001678 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001679 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001680 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001681
Martin v. Löwis9c830762006-04-13 08:37:17 +00001682 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001683 return NULL;
1684 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001685 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001686 if (subobj == Py_None)
1687 return rsplit_whitespace(s, len, maxsplit);
1688 if (PyString_Check(subobj)) {
1689 sub = PyString_AS_STRING(subobj);
1690 n = PyString_GET_SIZE(subobj);
1691 }
1692#ifdef Py_USING_UNICODE
1693 else if (PyUnicode_Check(subobj))
1694 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1695#endif
1696 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1697 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001698
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001699 if (n == 0) {
1700 PyErr_SetString(PyExc_ValueError, "empty separator");
1701 return NULL;
1702 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001703 else if (n == 1)
1704 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001705
Andrew Dalke525eab32006-05-26 14:00:45 +00001706 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001707 if (list == NULL)
1708 return NULL;
1709
1710 j = len;
1711 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001712
1713 while ( (i >= 0) && (maxsplit-- > 0) ) {
1714 for (; i>=0; i--) {
1715 if (Py_STRING_MATCH(s, i, sub, n)) {
1716 SPLIT_ADD(s, i + n, j);
1717 j = i;
1718 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001719 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001720 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001721 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001723 SPLIT_ADD(s, 0, j);
1724 FIX_PREALLOC_SIZE(list);
1725 if (PyList_Reverse(list) < 0)
1726 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001727 return list;
1728
Andrew Dalke525eab32006-05-26 14:00:45 +00001729onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001730 Py_DECREF(list);
1731 return NULL;
1732}
1733
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001734
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001735PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736"S.join(sequence) -> string\n\
1737\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001738Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001739sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740
1741static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001742string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001743{
1744 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001745 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001748 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001749 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001750 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001751 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752
Tim Peters19fe14e2001-01-19 03:03:47 +00001753 seq = PySequence_Fast(orig, "");
1754 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001755 return NULL;
1756 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001757
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001758 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001759 if (seqlen == 0) {
1760 Py_DECREF(seq);
1761 return PyString_FromString("");
1762 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001764 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001765 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1766 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001767 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001768 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001769 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001771
Raymond Hettinger674f2412004-08-23 23:23:54 +00001772 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001773 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001774 * Do a pre-pass to figure out the total amount of space we'll
1775 * need (sz), see whether any argument is absurd, and defer to
1776 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001777 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001779 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001780 item = PySequence_Fast_GET_ITEM(seq, i);
1781 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001782#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001783 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001784 /* Defer to Unicode join.
1785 * CAUTION: There's no gurantee that the
1786 * original sequence can be iterated over
1787 * again, so we must pass seq here.
1788 */
1789 PyObject *result;
1790 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001791 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001792 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001793 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001794#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001795 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001796 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001797 " %.80s found",
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001798 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001799 Py_DECREF(seq);
1800 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001801 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001802 sz += PyString_GET_SIZE(item);
1803 if (i != 0)
1804 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001805 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001806 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001807 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001808 Py_DECREF(seq);
1809 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001811 }
1812
1813 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001814 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001815 if (res == NULL) {
1816 Py_DECREF(seq);
1817 return NULL;
1818 }
1819
1820 /* Catenate everything. */
1821 p = PyString_AS_STRING(res);
1822 for (i = 0; i < seqlen; ++i) {
1823 size_t n;
1824 item = PySequence_Fast_GET_ITEM(seq, i);
1825 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001826 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001827 p += n;
1828 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001829 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001830 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001831 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001832 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001833
Jeremy Hylton49048292000-07-11 03:28:17 +00001834 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836}
1837
Tim Peters52e155e2001-06-16 05:42:57 +00001838PyObject *
1839_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001840{
Tim Petersa7259592001-06-16 05:11:17 +00001841 assert(sep != NULL && PyString_Check(sep));
1842 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001843 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001844}
1845
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001846Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001847string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001848{
1849 if (*end > len)
1850 *end = len;
1851 else if (*end < 0)
1852 *end += len;
1853 if (*end < 0)
1854 *end = 0;
1855 if (*start < 0)
1856 *start += len;
1857 if (*start < 0)
1858 *start = 0;
1859}
1860
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001861Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001862string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001864 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001865 const char *sub;
1866 Py_ssize_t sub_len;
1867 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001868
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001869 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1870 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871 return -2;
1872 if (PyString_Check(subobj)) {
1873 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001874 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001875 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001876#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001877 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001878 return PyUnicode_Find(
1879 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001880#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001881 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001882 /* XXX - the "expected a character buffer object" is pretty
1883 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001884 return -2;
1885
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001886 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001887 return stringlib_find_slice(
1888 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1889 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001890 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001891 return stringlib_rfind_slice(
1892 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1893 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894}
1895
1896
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001897PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898"S.find(sub [,start [,end]]) -> int\n\
1899\n\
1900Return the lowest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001901such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001902arguments start and end are interpreted as in slice notation.\n\
1903\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001904Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905
1906static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001907string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001909 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001910 if (result == -2)
1911 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001912 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913}
1914
1915
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001916PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917"S.index(sub [,start [,end]]) -> int\n\
1918\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001919Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920
1921static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001922string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001923{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001924 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 if (result == -2)
1926 return NULL;
1927 if (result == -1) {
1928 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001929 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001930 return NULL;
1931 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001932 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933}
1934
1935
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001936PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937"S.rfind(sub [,start [,end]]) -> int\n\
1938\n\
1939Return the highest index in S where substring sub is found,\n\
Georg Brandlb4d100c2007-07-29 17:37:22 +00001940such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941arguments start and end are interpreted as in slice notation.\n\
1942\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001943Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944
1945static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001946string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001948 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001949 if (result == -2)
1950 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001951 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952}
1953
1954
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001955PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956"S.rindex(sub [,start [,end]]) -> int\n\
1957\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001958Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959
1960static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001961string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001963 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964 if (result == -2)
1965 return NULL;
1966 if (result == -1) {
1967 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001968 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969 return NULL;
1970 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001971 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972}
1973
1974
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001975Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001976do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1977{
1978 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001979 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001980 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001981 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1982 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001983
1984 i = 0;
1985 if (striptype != RIGHTSTRIP) {
1986 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1987 i++;
1988 }
1989 }
1990
1991 j = len;
1992 if (striptype != LEFTSTRIP) {
1993 do {
1994 j--;
1995 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1996 j++;
1997 }
1998
1999 if (i == 0 && j == len && PyString_CheckExact(self)) {
2000 Py_INCREF(self);
2001 return (PyObject*)self;
2002 }
2003 else
2004 return PyString_FromStringAndSize(s+i, j-i);
2005}
2006
2007
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002008Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002009do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002010{
2011 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002012 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014 i = 0;
2015 if (striptype != RIGHTSTRIP) {
2016 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2017 i++;
2018 }
2019 }
2020
2021 j = len;
2022 if (striptype != LEFTSTRIP) {
2023 do {
2024 j--;
2025 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2026 j++;
2027 }
2028
Tim Peters8fa5dd02001-09-12 02:18:30 +00002029 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002030 Py_INCREF(self);
2031 return (PyObject*)self;
2032 }
2033 else
2034 return PyString_FromStringAndSize(s+i, j-i);
2035}
2036
2037
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002038Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002039do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2040{
2041 PyObject *sep = NULL;
2042
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002043 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002044 return NULL;
2045
2046 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002047 if (PyString_Check(sep))
2048 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002049#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002050 else if (PyUnicode_Check(sep)) {
2051 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2052 PyObject *res;
2053 if (uniself==NULL)
2054 return NULL;
2055 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2056 striptype, sep);
2057 Py_DECREF(uniself);
2058 return res;
2059 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002060#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002061 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002062#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002063 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002064#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002065 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002066#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002067 STRIPNAME(striptype));
2068 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002069 }
2070
2071 return do_strip(self, striptype);
2072}
2073
2074
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002075PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002076"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077\n\
2078Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002080If chars is given and not None, remove characters in chars instead.\n\
2081If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002082
2083static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002084string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002086 if (PyTuple_GET_SIZE(args) == 0)
2087 return do_strip(self, BOTHSTRIP); /* Common case */
2088 else
2089 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090}
2091
2092
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002093PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002094"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002097If chars is given and not None, remove characters in chars instead.\n\
2098If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099
2100static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002101string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002103 if (PyTuple_GET_SIZE(args) == 0)
2104 return do_strip(self, LEFTSTRIP); /* Common case */
2105 else
2106 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107}
2108
2109
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002110PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002111"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002113Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002114If chars is given and not None, remove characters in chars instead.\n\
2115If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116
2117static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002118string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002120 if (PyTuple_GET_SIZE(args) == 0)
2121 return do_strip(self, RIGHTSTRIP); /* Common case */
2122 else
2123 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002124}
2125
2126
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002127PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128"S.lower() -> string\n\
2129\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002130Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002132/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2133#ifndef _tolower
2134#define _tolower tolower
2135#endif
2136
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002137static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002138string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002140 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002141 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002142 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002144 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002145 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002146 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002147
2148 s = PyString_AS_STRING(newobj);
2149
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002150 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002151
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002153 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002154 if (isupper(c))
2155 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002157
Anthony Baxtera6286212006-04-11 07:42:36 +00002158 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159}
2160
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002161PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002162"S.upper() -> string\n\
2163\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002164Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002166#ifndef _toupper
2167#define _toupper toupper
2168#endif
2169
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002171string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002173 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002174 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002175 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002177 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002178 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002180
2181 s = PyString_AS_STRING(newobj);
2182
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002183 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002184
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002186 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002187 if (islower(c))
2188 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002189 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002190
Anthony Baxtera6286212006-04-11 07:42:36 +00002191 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192}
2193
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002194PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002195"S.title() -> string\n\
2196\n\
2197Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002198characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002199
2200static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002201string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202{
2203 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002204 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002205 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002206 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207
Anthony Baxtera6286212006-04-11 07:42:36 +00002208 newobj = PyString_FromStringAndSize(NULL, n);
2209 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002210 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002211 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002212 for (i = 0; i < n; i++) {
2213 int c = Py_CHARMASK(*s++);
2214 if (islower(c)) {
2215 if (!previous_is_cased)
2216 c = toupper(c);
2217 previous_is_cased = 1;
2218 } else if (isupper(c)) {
2219 if (previous_is_cased)
2220 c = tolower(c);
2221 previous_is_cased = 1;
2222 } else
2223 previous_is_cased = 0;
2224 *s_new++ = c;
2225 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002226 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002227}
2228
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002229PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230"S.capitalize() -> string\n\
2231\n\
2232Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002233capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234
2235static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002236string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002237{
2238 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002239 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002240 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241
Anthony Baxtera6286212006-04-11 07:42:36 +00002242 newobj = PyString_FromStringAndSize(NULL, n);
2243 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002244 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002245 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246 if (0 < n) {
2247 int c = Py_CHARMASK(*s++);
2248 if (islower(c))
2249 *s_new = toupper(c);
2250 else
2251 *s_new = c;
2252 s_new++;
2253 }
2254 for (i = 1; i < n; i++) {
2255 int c = Py_CHARMASK(*s++);
2256 if (isupper(c))
2257 *s_new = tolower(c);
2258 else
2259 *s_new = c;
2260 s_new++;
2261 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002262 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263}
2264
2265
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002266PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267"S.count(sub[, start[, end]]) -> int\n\
2268\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002269Return the number of non-overlapping occurrences of substring sub in\n\
2270string S[start:end]. Optional arguments start and end are interpreted\n\
2271as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002272
2273static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002274string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002276 PyObject *sub_obj;
2277 const char *str = PyString_AS_STRING(self), *sub;
2278 Py_ssize_t sub_len;
2279 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002280
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002281 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2282 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002283 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002284
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002285 if (PyString_Check(sub_obj)) {
2286 sub = PyString_AS_STRING(sub_obj);
2287 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002288 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002289#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002290 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002291 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002292 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002293 if (count == -1)
2294 return NULL;
2295 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002296 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002297 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002298#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002299 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002300 return NULL;
2301
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002302 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002303
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002304 return PyInt_FromSsize_t(
2305 stringlib_count(str + start, end - start, sub, sub_len)
2306 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002307}
2308
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002309PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002310"S.swapcase() -> string\n\
2311\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002312Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002313converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002314
2315static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002316string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002317{
2318 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002319 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002320 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321
Anthony Baxtera6286212006-04-11 07:42:36 +00002322 newobj = PyString_FromStringAndSize(NULL, n);
2323 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002325 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326 for (i = 0; i < n; i++) {
2327 int c = Py_CHARMASK(*s++);
2328 if (islower(c)) {
2329 *s_new = toupper(c);
2330 }
2331 else if (isupper(c)) {
2332 *s_new = tolower(c);
2333 }
2334 else
2335 *s_new = c;
2336 s_new++;
2337 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002338 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339}
2340
2341
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002342PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002343"S.translate(table [,deletechars]) -> string\n\
2344\n\
2345Return a copy of the string S, where all characters occurring\n\
2346in the optional argument deletechars are removed, and the\n\
2347remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002348translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002349
2350static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002351string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002352{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002353 register char *input, *output;
2354 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002355 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002356 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002357 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002358 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002359 PyObject *result;
2360 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002361 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002362
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002363 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002364 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002366
2367 if (PyString_Check(tableobj)) {
2368 table1 = PyString_AS_STRING(tableobj);
2369 tablen = PyString_GET_SIZE(tableobj);
2370 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002371#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002372 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002373 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 parameter; instead a mapping to None will cause characters
2375 to be deleted. */
2376 if (delobj != NULL) {
2377 PyErr_SetString(PyExc_TypeError,
2378 "deletions are implemented differently for unicode");
2379 return NULL;
2380 }
2381 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2382 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002383#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002384 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002385 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002386
Martin v. Löwis00b61272002-12-12 20:03:19 +00002387 if (tablen != 256) {
2388 PyErr_SetString(PyExc_ValueError,
2389 "translation table must be 256 characters long");
2390 return NULL;
2391 }
2392
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393 if (delobj != NULL) {
2394 if (PyString_Check(delobj)) {
2395 del_table = PyString_AS_STRING(delobj);
2396 dellen = PyString_GET_SIZE(delobj);
2397 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002398#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002399 else if (PyUnicode_Check(delobj)) {
2400 PyErr_SetString(PyExc_TypeError,
2401 "deletions are implemented differently for unicode");
2402 return NULL;
2403 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002404#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002405 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2406 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002407 }
2408 else {
2409 del_table = NULL;
2410 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002411 }
2412
2413 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002414 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002415 result = PyString_FromStringAndSize((char *)NULL, inlen);
2416 if (result == NULL)
2417 return NULL;
2418 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002419 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002420
2421 if (dellen == 0) {
2422 /* If no deletions are required, use faster code */
2423 for (i = inlen; --i >= 0; ) {
2424 c = Py_CHARMASK(*input++);
2425 if (Py_CHARMASK((*output++ = table[c])) != c)
2426 changed = 1;
2427 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002428 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 return result;
2430 Py_DECREF(result);
2431 Py_INCREF(input_obj);
2432 return input_obj;
2433 }
2434
2435 for (i = 0; i < 256; i++)
2436 trans_table[i] = Py_CHARMASK(table[i]);
2437
2438 for (i = 0; i < dellen; i++)
2439 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2440
2441 for (i = inlen; --i >= 0; ) {
2442 c = Py_CHARMASK(*input++);
2443 if (trans_table[c] != -1)
2444 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2445 continue;
2446 changed = 1;
2447 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002448 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449 Py_DECREF(result);
2450 Py_INCREF(input_obj);
2451 return input_obj;
2452 }
2453 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002454 if (inlen > 0)
2455 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002456 return result;
2457}
2458
2459
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002460#define FORWARD 1
2461#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002462
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002463/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002464
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002465#define findchar(target, target_len, c) \
2466 ((char *)memchr((const void *)(target), c, target_len))
2467
2468/* String ops must return a string. */
2469/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002470Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002471return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002473 if (PyString_CheckExact(self)) {
2474 Py_INCREF(self);
2475 return self;
2476 }
2477 return (PyStringObject *)PyString_FromStringAndSize(
2478 PyString_AS_STRING(self),
2479 PyString_GET_SIZE(self));
2480}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002481
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002482Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002483countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002484{
2485 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002486 const char *start=target;
2487 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002488
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002489 while ( (start=findchar(start, end-start, c)) != NULL ) {
2490 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002491 if (count >= maxcount)
2492 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002493 start += 1;
2494 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002495 return count;
2496}
2497
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002498Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002499findstring(const char *target, Py_ssize_t target_len,
2500 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002501 Py_ssize_t start,
2502 Py_ssize_t end,
2503 int direction)
2504{
2505 if (start < 0) {
2506 start += target_len;
2507 if (start < 0)
2508 start = 0;
2509 }
2510 if (end > target_len) {
2511 end = target_len;
2512 } else if (end < 0) {
2513 end += target_len;
2514 if (end < 0)
2515 end = 0;
2516 }
2517
2518 /* zero-length substrings always match at the first attempt */
2519 if (pattern_len == 0)
2520 return (direction > 0) ? start : end;
2521
2522 end -= pattern_len;
2523
2524 if (direction < 0) {
2525 for (; end >= start; end--)
2526 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2527 return end;
2528 } else {
2529 for (; start <= end; start++)
2530 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2531 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002532 }
2533 return -1;
2534}
2535
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002536Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002537countstring(const char *target, Py_ssize_t target_len,
2538 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002539 Py_ssize_t start,
2540 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002541 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002542{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002543 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002544
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002545 if (start < 0) {
2546 start += target_len;
2547 if (start < 0)
2548 start = 0;
2549 }
2550 if (end > target_len) {
2551 end = target_len;
2552 } else if (end < 0) {
2553 end += target_len;
2554 if (end < 0)
2555 end = 0;
2556 }
2557
2558 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002559 if (pattern_len == 0 || maxcount == 0) {
2560 if (target_len+1 < maxcount)
2561 return target_len+1;
2562 return maxcount;
2563 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002564
2565 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002566 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002567 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002568 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2569 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002570 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002571 end -= pattern_len-1;
2572 }
2573 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002574 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002575 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2576 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002577 if (--maxcount <= 0)
2578 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002579 start += pattern_len-1;
2580 }
2581 }
2582 return count;
2583}
2584
2585
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002586/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002587
2588/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002589Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002590replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002591 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002592 Py_ssize_t maxcount)
2593{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002594 char *self_s, *result_s;
2595 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002596 Py_ssize_t count, i, product;
2597 PyStringObject *result;
2598
2599 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002600
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002601 /* 1 at the end plus 1 after every character */
2602 count = self_len+1;
2603 if (maxcount < count)
2604 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002605
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002606 /* Check for overflow */
2607 /* result_len = count * to_len + self_len; */
2608 product = count * to_len;
2609 if (product / to_len != count) {
2610 PyErr_SetString(PyExc_OverflowError,
2611 "replace string is too long");
2612 return NULL;
2613 }
2614 result_len = product + self_len;
2615 if (result_len < 0) {
2616 PyErr_SetString(PyExc_OverflowError,
2617 "replace string is too long");
2618 return NULL;
2619 }
2620
2621 if (! (result = (PyStringObject *)
2622 PyString_FromStringAndSize(NULL, result_len)) )
2623 return NULL;
2624
2625 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002626 result_s = PyString_AS_STRING(result);
2627
2628 /* TODO: special case single character, which doesn't need memcpy */
2629
2630 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002631 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002632 result_s += to_len;
2633 count -= 1;
2634
2635 for (i=0; i<count; i++) {
2636 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002637 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002638 result_s += to_len;
2639 }
2640
2641 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002642 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002643
2644 return result;
2645}
2646
2647/* Special case for deleting a single character */
2648/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002649Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002650replace_delete_single_character(PyStringObject *self,
2651 char from_c, Py_ssize_t maxcount)
2652{
2653 char *self_s, *result_s;
2654 char *start, *next, *end;
2655 Py_ssize_t self_len, result_len;
2656 Py_ssize_t count;
2657 PyStringObject *result;
2658
2659 self_len = PyString_GET_SIZE(self);
2660 self_s = PyString_AS_STRING(self);
2661
Andrew Dalke51324072006-05-26 20:25:22 +00002662 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002663 if (count == 0) {
2664 return return_self(self);
2665 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002666
2667 result_len = self_len - count; /* from_len == 1 */
2668 assert(result_len>=0);
2669
2670 if ( (result = (PyStringObject *)
2671 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2672 return NULL;
2673 result_s = PyString_AS_STRING(result);
2674
2675 start = self_s;
2676 end = self_s + self_len;
2677 while (count-- > 0) {
2678 next = findchar(start, end-start, from_c);
2679 if (next == NULL)
2680 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002681 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002682 result_s += (next-start);
2683 start = next+1;
2684 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002685 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002686
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002687 return result;
2688}
2689
2690/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2691
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002692Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002693replace_delete_substring(PyStringObject *self,
2694 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002695 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002696 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002697 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002698 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002699 Py_ssize_t count, offset;
2700 PyStringObject *result;
2701
2702 self_len = PyString_GET_SIZE(self);
2703 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002704
2705 count = countstring(self_s, self_len,
2706 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002707 0, self_len, 1,
2708 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002709
2710 if (count == 0) {
2711 /* no matches */
2712 return return_self(self);
2713 }
2714
2715 result_len = self_len - (count * from_len);
2716 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002717
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002718 if ( (result = (PyStringObject *)
2719 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2720 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002721
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002722 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002723
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002724 start = self_s;
2725 end = self_s + self_len;
2726 while (count-- > 0) {
2727 offset = findstring(start, end-start,
2728 from_s, from_len,
2729 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002730 if (offset == -1)
2731 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002732 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002733
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002734 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002735
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002736 result_s += (next-start);
2737 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002738 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002739 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002740 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002741}
2742
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002743/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002744Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002745replace_single_character_in_place(PyStringObject *self,
2746 char from_c, char to_c,
2747 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002748{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002749 char *self_s, *result_s, *start, *end, *next;
2750 Py_ssize_t self_len;
2751 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002752
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002753 /* The result string will be the same size */
2754 self_s = PyString_AS_STRING(self);
2755 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002756
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002757 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002758
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002759 if (next == NULL) {
2760 /* No matches; return the original string */
2761 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002762 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002763
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002764 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002765 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002766 if (result == NULL)
2767 return NULL;
2768 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002769 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002770
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002771 /* change everything in-place, starting with this one */
2772 start = result_s + (next-self_s);
2773 *start = to_c;
2774 start++;
2775 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002776
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002777 while (--maxcount > 0) {
2778 next = findchar(start, end-start, from_c);
2779 if (next == NULL)
2780 break;
2781 *next = to_c;
2782 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002783 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002784
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002785 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002786}
2787
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002788/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002789Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002790replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002791 const char *from_s, Py_ssize_t from_len,
2792 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002793 Py_ssize_t maxcount)
2794{
2795 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002796 char *self_s;
2797 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002798 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002799
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002800 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002801
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002802 self_s = PyString_AS_STRING(self);
2803 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002804
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002805 offset = findstring(self_s, self_len,
2806 from_s, from_len,
2807 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808 if (offset == -1) {
2809 /* No matches; return the original string */
2810 return return_self(self);
2811 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002812
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002813 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002814 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002815 if (result == NULL)
2816 return NULL;
2817 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002818 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002819
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002820 /* change everything in-place, starting with this one */
2821 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002822 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002823 start += from_len;
2824 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002825
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002826 while ( --maxcount > 0) {
2827 offset = findstring(start, end-start,
2828 from_s, from_len,
2829 0, end-start, FORWARD);
2830 if (offset==-1)
2831 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002832 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002833 start += offset+from_len;
2834 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002835
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836 return result;
2837}
2838
2839/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002840Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002841replace_single_character(PyStringObject *self,
2842 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002843 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002844 Py_ssize_t maxcount)
2845{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002846 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002847 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002848 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002849 Py_ssize_t count, product;
2850 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002851
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002852 self_s = PyString_AS_STRING(self);
2853 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002854
Andrew Dalke51324072006-05-26 20:25:22 +00002855 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002856 if (count == 0) {
2857 /* no matches, return unchanged */
2858 return return_self(self);
2859 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002860
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002861 /* use the difference between current and new, hence the "-1" */
2862 /* result_len = self_len + count * (to_len-1) */
2863 product = count * (to_len-1);
2864 if (product / (to_len-1) != count) {
2865 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2866 return NULL;
2867 }
2868 result_len = self_len + product;
2869 if (result_len < 0) {
2870 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2871 return NULL;
2872 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002873
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002874 if ( (result = (PyStringObject *)
2875 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2876 return NULL;
2877 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002878
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002879 start = self_s;
2880 end = self_s + self_len;
2881 while (count-- > 0) {
2882 next = findchar(start, end-start, from_c);
2883 if (next == NULL)
2884 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002885
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002886 if (next == start) {
2887 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002888 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002889 result_s += to_len;
2890 start += 1;
2891 } else {
2892 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002893 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002894 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002895 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002896 result_s += to_len;
2897 start = next+1;
2898 }
2899 }
2900 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002901 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002902
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002903 return result;
2904}
2905
2906/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002907Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002908replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002909 const char *from_s, Py_ssize_t from_len,
2910 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002911 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002912 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002913 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002914 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002915 Py_ssize_t count, offset, product;
2916 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002917
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002918 self_s = PyString_AS_STRING(self);
2919 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002920
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002921 count = countstring(self_s, self_len,
2922 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002923 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002924 if (count == 0) {
2925 /* no matches, return unchanged */
2926 return return_self(self);
2927 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002928
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002929 /* Check for overflow */
2930 /* result_len = self_len + count * (to_len-from_len) */
2931 product = count * (to_len-from_len);
2932 if (product / (to_len-from_len) != count) {
2933 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2934 return NULL;
2935 }
2936 result_len = self_len + product;
2937 if (result_len < 0) {
2938 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2939 return NULL;
2940 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002941
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002942 if ( (result = (PyStringObject *)
2943 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2944 return NULL;
2945 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002946
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002947 start = self_s;
2948 end = self_s + self_len;
2949 while (count-- > 0) {
2950 offset = findstring(start, end-start,
2951 from_s, from_len,
2952 0, end-start, FORWARD);
2953 if (offset == -1)
2954 break;
2955 next = start+offset;
2956 if (next == start) {
2957 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002958 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002959 result_s += to_len;
2960 start += from_len;
2961 } else {
2962 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002963 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002964 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002965 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002966 result_s += to_len;
2967 start = next+from_len;
2968 }
2969 }
2970 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002971 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002972
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002973 return result;
2974}
2975
2976
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002977Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002979 const char *from_s, Py_ssize_t from_len,
2980 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002981 Py_ssize_t maxcount)
2982{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002983 if (maxcount < 0) {
2984 maxcount = PY_SSIZE_T_MAX;
2985 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2986 /* nothing to do; return the original string */
2987 return return_self(self);
2988 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002989
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002990 if (maxcount == 0 ||
2991 (from_len == 0 && to_len == 0)) {
2992 /* nothing to do; return the original string */
2993 return return_self(self);
2994 }
2995
2996 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00002997
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002998 if (from_len == 0) {
2999 /* insert the 'to' string everywhere. */
3000 /* >>> "Python".replace("", ".") */
3001 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003002 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003003 }
3004
3005 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3006 /* point for an empty self string to generate a non-empty string */
3007 /* Special case so the remaining code always gets a non-empty string */
3008 if (PyString_GET_SIZE(self) == 0) {
3009 return return_self(self);
3010 }
3011
3012 if (to_len == 0) {
3013 /* delete all occurances of 'from' string */
3014 if (from_len == 1) {
3015 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003016 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003017 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003018 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003019 }
3020 }
3021
3022 /* Handle special case where both strings have the same length */
3023
3024 if (from_len == to_len) {
3025 if (from_len == 1) {
3026 return replace_single_character_in_place(
3027 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003028 from_s[0],
3029 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003030 maxcount);
3031 } else {
3032 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003033 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003034 }
3035 }
3036
3037 /* Otherwise use the more generic algorithms */
3038 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003039 return replace_single_character(self, from_s[0],
3040 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003041 } else {
3042 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003043 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003044 }
3045}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003046
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003047PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003048"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003049\n\
3050Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003051old replaced by new. If the optional argument count is\n\
3052given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003053
3054static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003055string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003056{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003057 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003058 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003059 const char *from_s, *to_s;
3060 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003061
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003062 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003063 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003064
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003065 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003066 from_s = PyString_AS_STRING(from);
3067 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003068 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003069#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003070 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003071 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003072 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003073#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003074 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075 return NULL;
3076
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003077 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003078 to_s = PyString_AS_STRING(to);
3079 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003080 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003081#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003082 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003083 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003084 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003085#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003086 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003087 return NULL;
3088
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003089 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003090 from_s, from_len,
3091 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003092}
3093
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003094/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003095
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003096/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003097 * against substr, using the start and end arguments. Returns
3098 * -1 on error, 0 if not found and 1 if found.
3099 */
3100Py_LOCAL(int)
3101_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3102 Py_ssize_t end, int direction)
3103{
3104 Py_ssize_t len = PyString_GET_SIZE(self);
3105 Py_ssize_t slen;
3106 const char* sub;
3107 const char* str;
3108
3109 if (PyString_Check(substr)) {
3110 sub = PyString_AS_STRING(substr);
3111 slen = PyString_GET_SIZE(substr);
3112 }
3113#ifdef Py_USING_UNICODE
3114 else if (PyUnicode_Check(substr))
3115 return PyUnicode_Tailmatch((PyObject *)self,
3116 substr, start, end, direction);
3117#endif
3118 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3119 return -1;
3120 str = PyString_AS_STRING(self);
3121
3122 string_adjust_indices(&start, &end, len);
3123
3124 if (direction < 0) {
3125 /* startswith */
3126 if (start+slen > len)
3127 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003128 } else {
3129 /* endswith */
3130 if (end-start < slen || start > len)
3131 return 0;
3132
3133 if (end-slen > start)
3134 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003135 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003136 if (end-start >= slen)
3137 return ! memcmp(str+start, sub, slen);
3138 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003139}
3140
3141
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003142PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003143"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003144\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003145Return True if S starts with the specified prefix, False otherwise.\n\
3146With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003147With optional end, stop comparing S at that position.\n\
3148prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003149
3150static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003151string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003152{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003153 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003154 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003155 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003156 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003157
Guido van Rossumc6821402000-05-08 14:08:05 +00003158 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3159 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003160 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003161 if (PyTuple_Check(subobj)) {
3162 Py_ssize_t i;
3163 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3164 result = _string_tailmatch(self,
3165 PyTuple_GET_ITEM(subobj, i),
3166 start, end, -1);
3167 if (result == -1)
3168 return NULL;
3169 else if (result) {
3170 Py_RETURN_TRUE;
3171 }
3172 }
3173 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003174 }
Georg Brandl24250812006-06-09 18:45:48 +00003175 result = _string_tailmatch(self, subobj, start, end, -1);
3176 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003177 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003178 else
Georg Brandl24250812006-06-09 18:45:48 +00003179 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003180}
3181
3182
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003183PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003184"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003185\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003186Return True if S ends with the specified suffix, False otherwise.\n\
3187With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003188With optional end, stop comparing S at that position.\n\
3189suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003190
3191static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003192string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003193{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003194 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003195 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003196 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003197 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003198
Guido van Rossumc6821402000-05-08 14:08:05 +00003199 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3200 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003201 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003202 if (PyTuple_Check(subobj)) {
3203 Py_ssize_t i;
3204 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3205 result = _string_tailmatch(self,
3206 PyTuple_GET_ITEM(subobj, i),
3207 start, end, +1);
3208 if (result == -1)
3209 return NULL;
3210 else if (result) {
3211 Py_RETURN_TRUE;
3212 }
3213 }
3214 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003215 }
Georg Brandl24250812006-06-09 18:45:48 +00003216 result = _string_tailmatch(self, subobj, start, end, +1);
3217 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003218 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003219 else
Georg Brandl24250812006-06-09 18:45:48 +00003220 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003221}
3222
3223
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003224PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003225"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003226\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003227Encodes S using the codec registered for encoding. encoding defaults\n\
3228to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003229handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003230a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3231'xmlcharrefreplace' as well as any other name registered with\n\
3232codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003233
3234static PyObject *
3235string_encode(PyStringObject *self, PyObject *args)
3236{
3237 char *encoding = NULL;
3238 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003239 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003240
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003241 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3242 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003243 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003244 if (v == NULL)
3245 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003246 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3247 PyErr_Format(PyExc_TypeError,
3248 "encoder did not return a string/unicode object "
3249 "(type=%.400s)",
3250 v->ob_type->tp_name);
3251 Py_DECREF(v);
3252 return NULL;
3253 }
3254 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003255
3256 onError:
3257 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003258}
3259
3260
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003261PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003262"S.decode([encoding[,errors]]) -> object\n\
3263\n\
3264Decodes S using the codec registered for encoding. encoding defaults\n\
3265to the default encoding. errors may be given to set a different error\n\
3266handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003267a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3268as well as any other name registerd with codecs.register_error that is\n\
3269able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003270
3271static PyObject *
3272string_decode(PyStringObject *self, PyObject *args)
3273{
3274 char *encoding = NULL;
3275 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003276 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003277
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003278 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3279 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003280 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003281 if (v == NULL)
3282 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003283 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3284 PyErr_Format(PyExc_TypeError,
3285 "decoder did not return a string/unicode object "
3286 "(type=%.400s)",
3287 v->ob_type->tp_name);
3288 Py_DECREF(v);
3289 return NULL;
3290 }
3291 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003292
3293 onError:
3294 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003295}
3296
3297
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003298PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003299"S.expandtabs([tabsize]) -> string\n\
3300\n\
3301Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003302If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003303
3304static PyObject*
3305string_expandtabs(PyStringObject *self, PyObject *args)
3306{
Guido van Rossum44a93e52008-03-11 21:14:54 +00003307 const char *e, *p, *qe;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003308 char *q;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003309 Py_ssize_t i, j, incr;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003310 PyObject *u;
3311 int tabsize = 8;
3312
3313 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3314 return NULL;
3315
Thomas Wouters7e474022000-07-16 12:04:32 +00003316 /* First pass: determine size of output string */
Guido van Rossum44a93e52008-03-11 21:14:54 +00003317 i = 0; /* chars up to and including most recent \n or \r */
3318 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3319 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003320 for (p = PyString_AS_STRING(self); p < e; p++)
3321 if (*p == '\t') {
Neal Norwitz66e64e22007-06-09 04:06:30 +00003322 if (tabsize > 0) {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003323 incr = tabsize - (j % tabsize);
3324 if (j > PY_SSIZE_T_MAX - incr)
3325 goto overflow1;
3326 j += incr;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003327 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003328 }
3329 else {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003330 if (j > PY_SSIZE_T_MAX - 1)
3331 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003332 j++;
3333 if (*p == '\n' || *p == '\r') {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003334 if (i > PY_SSIZE_T_MAX - j)
3335 goto overflow1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003336 i += j;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003337 j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003338 }
3339 }
3340
Guido van Rossum44a93e52008-03-11 21:14:54 +00003341 if (i > PY_SSIZE_T_MAX - j)
3342 goto overflow1;
Neal Norwitz66e64e22007-06-09 04:06:30 +00003343
Guido van Rossum4c08d552000-03-10 22:55:18 +00003344 /* Second pass: create output string and fill it */
3345 u = PyString_FromStringAndSize(NULL, i + j);
3346 if (!u)
3347 return NULL;
3348
Guido van Rossum44a93e52008-03-11 21:14:54 +00003349 j = 0; /* same as in first pass */
3350 q = PyString_AS_STRING(u); /* next output char */
3351 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003352
3353 for (p = PyString_AS_STRING(self); p < e; p++)
3354 if (*p == '\t') {
3355 if (tabsize > 0) {
3356 i = tabsize - (j % tabsize);
3357 j += i;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003358 while (i--) {
3359 if (q >= qe)
3360 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003361 *q++ = ' ';
Guido van Rossum44a93e52008-03-11 21:14:54 +00003362 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003363 }
3364 }
3365 else {
Guido van Rossum44a93e52008-03-11 21:14:54 +00003366 if (q >= qe)
3367 goto overflow2;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003368 *q++ = *p;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003369 j++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003370 if (*p == '\n' || *p == '\r')
3371 j = 0;
3372 }
3373
3374 return u;
Guido van Rossum44a93e52008-03-11 21:14:54 +00003375
3376 overflow2:
3377 Py_DECREF(u);
3378 overflow1:
3379 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3380 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003381}
3382
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003383Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003384pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003385{
3386 PyObject *u;
3387
3388 if (left < 0)
3389 left = 0;
3390 if (right < 0)
3391 right = 0;
3392
Tim Peters8fa5dd02001-09-12 02:18:30 +00003393 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003394 Py_INCREF(self);
3395 return (PyObject *)self;
3396 }
3397
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003398 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003399 left + PyString_GET_SIZE(self) + right);
3400 if (u) {
3401 if (left)
3402 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003403 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003404 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003405 PyString_GET_SIZE(self));
3406 if (right)
3407 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3408 fill, right);
3409 }
3410
3411 return u;
3412}
3413
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003414PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003415"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003416"\n"
3417"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003418"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003419
3420static PyObject *
3421string_ljust(PyStringObject *self, PyObject *args)
3422{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003423 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003424 char fillchar = ' ';
3425
Thomas Wouters4abb3662006-04-19 14:50:15 +00003426 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003427 return NULL;
3428
Tim Peters8fa5dd02001-09-12 02:18:30 +00003429 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430 Py_INCREF(self);
3431 return (PyObject*) self;
3432 }
3433
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003434 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003435}
3436
3437
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003438PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003439"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003440"\n"
3441"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003442"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003443
3444static PyObject *
3445string_rjust(PyStringObject *self, PyObject *args)
3446{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003447 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003448 char fillchar = ' ';
3449
Thomas Wouters4abb3662006-04-19 14:50:15 +00003450 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003451 return NULL;
3452
Tim Peters8fa5dd02001-09-12 02:18:30 +00003453 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003454 Py_INCREF(self);
3455 return (PyObject*) self;
3456 }
3457
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003458 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003459}
3460
3461
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003462PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003463"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003464"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003465"Return S centered in a string of length width. Padding is\n"
3466"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003467
3468static PyObject *
3469string_center(PyStringObject *self, PyObject *args)
3470{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003471 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003472 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003473 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003474
Thomas Wouters4abb3662006-04-19 14:50:15 +00003475 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003476 return NULL;
3477
Tim Peters8fa5dd02001-09-12 02:18:30 +00003478 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003479 Py_INCREF(self);
3480 return (PyObject*) self;
3481 }
3482
3483 marg = width - PyString_GET_SIZE(self);
3484 left = marg / 2 + (marg & width & 1);
3485
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003486 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003487}
3488
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003489PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003490"S.zfill(width) -> string\n"
3491"\n"
3492"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003493"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003494
3495static PyObject *
3496string_zfill(PyStringObject *self, PyObject *args)
3497{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003498 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003499 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003500 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003501 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003502
Thomas Wouters4abb3662006-04-19 14:50:15 +00003503 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003504 return NULL;
3505
3506 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003507 if (PyString_CheckExact(self)) {
3508 Py_INCREF(self);
3509 return (PyObject*) self;
3510 }
3511 else
3512 return PyString_FromStringAndSize(
3513 PyString_AS_STRING(self),
3514 PyString_GET_SIZE(self)
3515 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003516 }
3517
3518 fill = width - PyString_GET_SIZE(self);
3519
3520 s = pad(self, fill, 0, '0');
3521
3522 if (s == NULL)
3523 return NULL;
3524
3525 p = PyString_AS_STRING(s);
3526 if (p[fill] == '+' || p[fill] == '-') {
3527 /* move sign to beginning of string */
3528 p[0] = p[fill];
3529 p[fill] = '0';
3530 }
3531
3532 return (PyObject*) s;
3533}
3534
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003535PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003536"S.isspace() -> bool\n\
3537\n\
3538Return True if all characters in S are whitespace\n\
3539and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003540
3541static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003542string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003543{
Fred Drakeba096332000-07-09 07:04:36 +00003544 register const unsigned char *p
3545 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003546 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003547
Guido van Rossum4c08d552000-03-10 22:55:18 +00003548 /* Shortcut for single character strings */
3549 if (PyString_GET_SIZE(self) == 1 &&
3550 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003551 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003552
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003553 /* Special case for empty strings */
3554 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003555 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003556
Guido van Rossum4c08d552000-03-10 22:55:18 +00003557 e = p + PyString_GET_SIZE(self);
3558 for (; p < e; p++) {
3559 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003560 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003561 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003562 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003563}
3564
3565
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003566PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003567"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003568\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003569Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003570and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003571
3572static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003573string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003574{
Fred Drakeba096332000-07-09 07:04:36 +00003575 register const unsigned char *p
3576 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003577 register const unsigned char *e;
3578
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003579 /* Shortcut for single character strings */
3580 if (PyString_GET_SIZE(self) == 1 &&
3581 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003582 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003583
3584 /* Special case for empty strings */
3585 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003586 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003587
3588 e = p + PyString_GET_SIZE(self);
3589 for (; p < e; p++) {
3590 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003591 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003592 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003593 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003594}
3595
3596
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003597PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003598"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003599\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003600Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003601and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003602
3603static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003604string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003605{
Fred Drakeba096332000-07-09 07:04:36 +00003606 register const unsigned char *p
3607 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003608 register const unsigned char *e;
3609
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003610 /* Shortcut for single character strings */
3611 if (PyString_GET_SIZE(self) == 1 &&
3612 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003613 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003614
3615 /* Special case for empty strings */
3616 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003617 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003618
3619 e = p + PyString_GET_SIZE(self);
3620 for (; p < e; p++) {
3621 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003622 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003623 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003624 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003625}
3626
3627
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003628PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003629"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003630\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003631Return True if all characters in S are digits\n\
3632and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003633
3634static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003635string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003636{
Fred Drakeba096332000-07-09 07:04:36 +00003637 register const unsigned char *p
3638 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003639 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003640
Guido van Rossum4c08d552000-03-10 22:55:18 +00003641 /* Shortcut for single character strings */
3642 if (PyString_GET_SIZE(self) == 1 &&
3643 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003645
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003646 /* Special case for empty strings */
3647 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003648 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003649
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650 e = p + PyString_GET_SIZE(self);
3651 for (; p < e; p++) {
3652 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003653 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003654 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003655 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003656}
3657
3658
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003659PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003660"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003661\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003662Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003663at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003664
3665static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003666string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003667{
Fred Drakeba096332000-07-09 07:04:36 +00003668 register const unsigned char *p
3669 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003670 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003671 int cased;
3672
Guido van Rossum4c08d552000-03-10 22:55:18 +00003673 /* Shortcut for single character strings */
3674 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003675 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003676
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003677 /* Special case for empty strings */
3678 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003679 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003680
Guido van Rossum4c08d552000-03-10 22:55:18 +00003681 e = p + PyString_GET_SIZE(self);
3682 cased = 0;
3683 for (; p < e; p++) {
3684 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003685 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686 else if (!cased && islower(*p))
3687 cased = 1;
3688 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003689 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003690}
3691
3692
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003693PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003694"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003695\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003696Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003697at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698
3699static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003700string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003701{
Fred Drakeba096332000-07-09 07:04:36 +00003702 register const unsigned char *p
3703 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003704 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003705 int cased;
3706
Guido van Rossum4c08d552000-03-10 22:55:18 +00003707 /* Shortcut for single character strings */
3708 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003709 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003710
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003711 /* Special case for empty strings */
3712 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003713 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003714
Guido van Rossum4c08d552000-03-10 22:55:18 +00003715 e = p + PyString_GET_SIZE(self);
3716 cased = 0;
3717 for (; p < e; p++) {
3718 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003719 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720 else if (!cased && isupper(*p))
3721 cased = 1;
3722 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003723 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003724}
3725
3726
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003727PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003728"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003730Return True if S is a titlecased string and there is at least one\n\
3731character in S, i.e. uppercase characters may only follow uncased\n\
3732characters and lowercase characters only cased ones. Return False\n\
3733otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734
3735static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003736string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003737{
Fred Drakeba096332000-07-09 07:04:36 +00003738 register const unsigned char *p
3739 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003740 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003741 int cased, previous_is_cased;
3742
Guido van Rossum4c08d552000-03-10 22:55:18 +00003743 /* Shortcut for single character strings */
3744 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003745 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003746
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003747 /* Special case for empty strings */
3748 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003749 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003750
Guido van Rossum4c08d552000-03-10 22:55:18 +00003751 e = p + PyString_GET_SIZE(self);
3752 cased = 0;
3753 previous_is_cased = 0;
3754 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003755 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756
3757 if (isupper(ch)) {
3758 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003759 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003760 previous_is_cased = 1;
3761 cased = 1;
3762 }
3763 else if (islower(ch)) {
3764 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003765 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003766 previous_is_cased = 1;
3767 cased = 1;
3768 }
3769 else
3770 previous_is_cased = 0;
3771 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003772 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773}
3774
3775
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003776PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003777"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003778\n\
3779Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003780Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003781is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782
Guido van Rossum4c08d552000-03-10 22:55:18 +00003783static PyObject*
3784string_splitlines(PyStringObject *self, PyObject *args)
3785{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003786 register Py_ssize_t i;
3787 register Py_ssize_t j;
3788 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003789 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003790 PyObject *list;
3791 PyObject *str;
3792 char *data;
3793
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003794 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003795 return NULL;
3796
3797 data = PyString_AS_STRING(self);
3798 len = PyString_GET_SIZE(self);
3799
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003800 /* This does not use the preallocated list because splitlines is
3801 usually run with hundreds of newlines. The overhead of
3802 switching between PyList_SET_ITEM and append causes about a
3803 2-3% slowdown for that common case. A smarter implementation
3804 could move the if check out, so the SET_ITEMs are done first
3805 and the appends only done when the prealloc buffer is full.
3806 That's too much work for little gain.*/
3807
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808 list = PyList_New(0);
3809 if (!list)
3810 goto onError;
3811
3812 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003813 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003814
Guido van Rossum4c08d552000-03-10 22:55:18 +00003815 /* Find a line and append it */
3816 while (i < len && data[i] != '\n' && data[i] != '\r')
3817 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003818
3819 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003820 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003821 if (i < len) {
3822 if (data[i] == '\r' && i + 1 < len &&
3823 data[i+1] == '\n')
3824 i += 2;
3825 else
3826 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003827 if (keepends)
3828 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003829 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003830 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003831 j = i;
3832 }
3833 if (j < len) {
3834 SPLIT_APPEND(data, j, len);
3835 }
3836
3837 return list;
3838
3839 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003840 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003841 return NULL;
3842}
3843
3844#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003845#undef SPLIT_ADD
3846#undef MAX_PREALLOC
3847#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003848
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003849static PyObject *
3850string_getnewargs(PyStringObject *v)
3851{
3852 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3853}
3854
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003855
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003856static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003857string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003858 /* Counterparts of the obsolete stropmodule functions; except
3859 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003860 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3861 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003862 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003863 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3864 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003865 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3866 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3867 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3868 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3869 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3870 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3871 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003872 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3873 capitalize__doc__},
3874 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3875 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3876 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003877 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003878 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3879 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3880 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3881 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3882 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3883 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3884 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003885 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3886 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003887 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3888 startswith__doc__},
3889 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3890 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3891 swapcase__doc__},
3892 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3893 translate__doc__},
3894 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3895 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3896 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3897 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3898 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3899 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3900 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3901 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3902 expandtabs__doc__},
3903 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3904 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003905 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003906 {NULL, NULL} /* sentinel */
3907};
3908
Jeremy Hylton938ace62002-07-17 16:30:39 +00003909static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003910str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3911
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003912static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003913string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003914{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003915 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003916 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003917
Guido van Rossumae960af2001-08-30 03:11:59 +00003918 if (type != &PyString_Type)
3919 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003920 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3921 return NULL;
3922 if (x == NULL)
3923 return PyString_FromString("");
3924 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003925}
3926
Guido van Rossumae960af2001-08-30 03:11:59 +00003927static PyObject *
3928str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3929{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003930 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003931 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003932
3933 assert(PyType_IsSubtype(type, &PyString_Type));
3934 tmp = string_new(&PyString_Type, args, kwds);
3935 if (tmp == NULL)
3936 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003937 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003938 n = PyString_GET_SIZE(tmp);
3939 pnew = type->tp_alloc(type, n);
3940 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003941 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003942 ((PyStringObject *)pnew)->ob_shash =
3943 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003944 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003945 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003946 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003947 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003948}
3949
Guido van Rossumcacfc072002-05-24 19:01:59 +00003950static PyObject *
3951basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3952{
3953 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003954 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003955 return NULL;
3956}
3957
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003958static PyObject *
3959string_mod(PyObject *v, PyObject *w)
3960{
3961 if (!PyString_Check(v)) {
3962 Py_INCREF(Py_NotImplemented);
3963 return Py_NotImplemented;
3964 }
3965 return PyString_Format(v, w);
3966}
3967
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003968PyDoc_STRVAR(basestring_doc,
3969"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003970
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003971static PyNumberMethods string_as_number = {
3972 0, /*nb_add*/
3973 0, /*nb_subtract*/
3974 0, /*nb_multiply*/
3975 0, /*nb_divide*/
3976 string_mod, /*nb_remainder*/
3977};
3978
3979
Guido van Rossumcacfc072002-05-24 19:01:59 +00003980PyTypeObject PyBaseString_Type = {
3981 PyObject_HEAD_INIT(&PyType_Type)
3982 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003983 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003984 0,
3985 0,
3986 0, /* tp_dealloc */
3987 0, /* tp_print */
3988 0, /* tp_getattr */
3989 0, /* tp_setattr */
3990 0, /* tp_compare */
3991 0, /* tp_repr */
3992 0, /* tp_as_number */
3993 0, /* tp_as_sequence */
3994 0, /* tp_as_mapping */
3995 0, /* tp_hash */
3996 0, /* tp_call */
3997 0, /* tp_str */
3998 0, /* tp_getattro */
3999 0, /* tp_setattro */
4000 0, /* tp_as_buffer */
4001 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4002 basestring_doc, /* tp_doc */
4003 0, /* tp_traverse */
4004 0, /* tp_clear */
4005 0, /* tp_richcompare */
4006 0, /* tp_weaklistoffset */
4007 0, /* tp_iter */
4008 0, /* tp_iternext */
4009 0, /* tp_methods */
4010 0, /* tp_members */
4011 0, /* tp_getset */
4012 &PyBaseObject_Type, /* tp_base */
4013 0, /* tp_dict */
4014 0, /* tp_descr_get */
4015 0, /* tp_descr_set */
4016 0, /* tp_dictoffset */
4017 0, /* tp_init */
4018 0, /* tp_alloc */
4019 basestring_new, /* tp_new */
4020 0, /* tp_free */
4021};
4022
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004023PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004024"str(object) -> string\n\
4025\n\
4026Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004027If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004028
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004029PyTypeObject PyString_Type = {
4030 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004031 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004032 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004033 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004034 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004035 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004036 (printfunc)string_print, /* tp_print */
4037 0, /* tp_getattr */
4038 0, /* tp_setattr */
4039 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004040 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004041 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004042 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004043 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004044 (hashfunc)string_hash, /* tp_hash */
4045 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004046 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004047 PyObject_GenericGetAttr, /* tp_getattro */
4048 0, /* tp_setattro */
4049 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004050 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004051 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004052 string_doc, /* tp_doc */
4053 0, /* tp_traverse */
4054 0, /* tp_clear */
4055 (richcmpfunc)string_richcompare, /* tp_richcompare */
4056 0, /* tp_weaklistoffset */
4057 0, /* tp_iter */
4058 0, /* tp_iternext */
4059 string_methods, /* tp_methods */
4060 0, /* tp_members */
4061 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004062 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004063 0, /* tp_dict */
4064 0, /* tp_descr_get */
4065 0, /* tp_descr_set */
4066 0, /* tp_dictoffset */
4067 0, /* tp_init */
4068 0, /* tp_alloc */
4069 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004070 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004071};
4072
4073void
Fred Drakeba096332000-07-09 07:04:36 +00004074PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004075{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004076 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004077 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004078 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004079 if (w == NULL || !PyString_Check(*pv)) {
4080 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004081 *pv = NULL;
4082 return;
4083 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004084 v = string_concat((PyStringObject *) *pv, w);
4085 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004086 *pv = v;
4087}
4088
Guido van Rossum013142a1994-08-30 08:19:36 +00004089void
Fred Drakeba096332000-07-09 07:04:36 +00004090PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004091{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004092 PyString_Concat(pv, w);
4093 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004094}
4095
4096
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004097/* The following function breaks the notion that strings are immutable:
4098 it changes the size of a string. We get away with this only if there
4099 is only one module referencing the object. You can also think of it
4100 as creating a new string object and destroying the old one, only
4101 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004102 already be known to some other part of the code...
4103 Note that if there's not enough memory to resize the string, the original
4104 string object at *pv is deallocated, *pv is set to NULL, an "out of
4105 memory" exception is set, and -1 is returned. Else (on success) 0 is
4106 returned, and the value in *pv may or may not be the same as on input.
4107 As always, an extra byte is allocated for a trailing \0 byte (newsize
4108 does *not* include that), and a trailing \0 byte is stored.
4109*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004110
4111int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004112_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004113{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004114 register PyObject *v;
4115 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004116 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004117 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4118 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004119 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004120 Py_DECREF(v);
4121 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004122 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004123 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004124 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004125 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004126 _Py_ForgetReference(v);
4127 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004128 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004129 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004130 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004131 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004132 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004133 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004134 _Py_NewReference(*pv);
4135 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004136 sv->ob_size = newsize;
4137 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004138 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004139 return 0;
4140}
Guido van Rossume5372401993-03-16 12:15:04 +00004141
4142/* Helpers for formatstring */
4143
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004144Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004145getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004146{
Thomas Wouters977485d2006-02-16 15:59:12 +00004147 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004148 if (argidx < arglen) {
4149 (*p_argidx)++;
4150 if (arglen < 0)
4151 return args;
4152 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004153 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004154 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004155 PyErr_SetString(PyExc_TypeError,
4156 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004157 return NULL;
4158}
4159
Tim Peters38fd5b62000-09-21 05:43:11 +00004160/* Format codes
4161 * F_LJUST '-'
4162 * F_SIGN '+'
4163 * F_BLANK ' '
4164 * F_ALT '#'
4165 * F_ZERO '0'
4166 */
Guido van Rossume5372401993-03-16 12:15:04 +00004167#define F_LJUST (1<<0)
4168#define F_SIGN (1<<1)
4169#define F_BLANK (1<<2)
4170#define F_ALT (1<<3)
4171#define F_ZERO (1<<4)
4172
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004173Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004174formatfloat(char *buf, size_t buflen, int flags,
4175 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004176{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004177 /* fmt = '%#.' + `prec` + `type`
4178 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004179 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004180 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004181 x = PyFloat_AsDouble(v);
4182 if (x == -1.0 && PyErr_Occurred()) {
4183 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004184 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004185 }
Guido van Rossume5372401993-03-16 12:15:04 +00004186 if (prec < 0)
4187 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004188 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4189 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004190 /* Worst case length calc to ensure no buffer overrun:
4191
4192 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004193 fmt = %#.<prec>g
4194 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004195 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004196 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004197
4198 'f' formats:
4199 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4200 len = 1 + 50 + 1 + prec = 52 + prec
4201
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004202 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004203 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004204
4205 */
Georg Brandlc5db9232007-07-12 08:38:04 +00004206 if (((type == 'g' || type == 'G') &&
4207 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004208 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004209 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004210 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004211 return -1;
4212 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004213 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4214 (flags&F_ALT) ? "#" : "",
4215 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004216 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004217 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004218}
4219
Tim Peters38fd5b62000-09-21 05:43:11 +00004220/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4221 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4222 * Python's regular ints.
4223 * Return value: a new PyString*, or NULL if error.
4224 * . *pbuf is set to point into it,
4225 * *plen set to the # of chars following that.
4226 * Caller must decref it when done using pbuf.
4227 * The string starting at *pbuf is of the form
4228 * "-"? ("0x" | "0X")? digit+
4229 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004230 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004231 * There will be at least prec digits, zero-filled on the left if
4232 * necessary to get that many.
4233 * val object to be converted
4234 * flags bitmask of format flags; only F_ALT is looked at
4235 * prec minimum number of digits; 0-fill on left if needed
4236 * type a character in [duoxX]; u acts the same as d
4237 *
4238 * CAUTION: o, x and X conversions on regular ints can never
4239 * produce a '-' sign, but can for Python's unbounded ints.
4240 */
4241PyObject*
4242_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4243 char **pbuf, int *plen)
4244{
4245 PyObject *result = NULL;
4246 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004247 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004248 int sign; /* 1 if '-', else 0 */
4249 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004250 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004251 int numdigits; /* len == numnondigits + numdigits */
4252 int numnondigits = 0;
4253
4254 switch (type) {
4255 case 'd':
4256 case 'u':
4257 result = val->ob_type->tp_str(val);
4258 break;
4259 case 'o':
4260 result = val->ob_type->tp_as_number->nb_oct(val);
4261 break;
4262 case 'x':
4263 case 'X':
4264 numnondigits = 2;
4265 result = val->ob_type->tp_as_number->nb_hex(val);
4266 break;
4267 default:
4268 assert(!"'type' not in [duoxX]");
4269 }
4270 if (!result)
4271 return NULL;
4272
Neal Norwitz56423e52006-08-13 18:11:08 +00004273 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004274 if (!buf) {
4275 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004276 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004277 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004278
Tim Peters38fd5b62000-09-21 05:43:11 +00004279 /* To modify the string in-place, there can only be one reference. */
4280 if (result->ob_refcnt != 1) {
4281 PyErr_BadInternalCall();
4282 return NULL;
4283 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004284 llen = PyString_Size(result);
Armin Rigo4b63c212006-10-04 11:44:06 +00004285 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004286 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4287 return NULL;
4288 }
4289 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004290 if (buf[len-1] == 'L') {
4291 --len;
4292 buf[len] = '\0';
4293 }
4294 sign = buf[0] == '-';
4295 numnondigits += sign;
4296 numdigits = len - numnondigits;
4297 assert(numdigits > 0);
4298
Tim Petersfff53252001-04-12 18:38:48 +00004299 /* Get rid of base marker unless F_ALT */
4300 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004301 /* Need to skip 0x, 0X or 0. */
4302 int skipped = 0;
4303 switch (type) {
4304 case 'o':
4305 assert(buf[sign] == '0');
4306 /* If 0 is only digit, leave it alone. */
4307 if (numdigits > 1) {
4308 skipped = 1;
4309 --numdigits;
4310 }
4311 break;
4312 case 'x':
4313 case 'X':
4314 assert(buf[sign] == '0');
4315 assert(buf[sign + 1] == 'x');
4316 skipped = 2;
4317 numnondigits -= 2;
4318 break;
4319 }
4320 if (skipped) {
4321 buf += skipped;
4322 len -= skipped;
4323 if (sign)
4324 buf[0] = '-';
4325 }
4326 assert(len == numnondigits + numdigits);
4327 assert(numdigits > 0);
4328 }
4329
4330 /* Fill with leading zeroes to meet minimum width. */
4331 if (prec > numdigits) {
4332 PyObject *r1 = PyString_FromStringAndSize(NULL,
4333 numnondigits + prec);
4334 char *b1;
4335 if (!r1) {
4336 Py_DECREF(result);
4337 return NULL;
4338 }
4339 b1 = PyString_AS_STRING(r1);
4340 for (i = 0; i < numnondigits; ++i)
4341 *b1++ = *buf++;
4342 for (i = 0; i < prec - numdigits; i++)
4343 *b1++ = '0';
4344 for (i = 0; i < numdigits; i++)
4345 *b1++ = *buf++;
4346 *b1 = '\0';
4347 Py_DECREF(result);
4348 result = r1;
4349 buf = PyString_AS_STRING(result);
4350 len = numnondigits + prec;
4351 }
4352
4353 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004354 if (type == 'X') {
4355 /* Need to convert all lower case letters to upper case.
4356 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004357 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004358 if (buf[i] >= 'a' && buf[i] <= 'x')
4359 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004360 }
4361 *pbuf = buf;
4362 *plen = len;
4363 return result;
4364}
4365
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004366Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004367formatint(char *buf, size_t buflen, int flags,
4368 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004369{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004370 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004371 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4372 + 1 + 1 = 24 */
4373 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004374 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004375 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004376
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004377 x = PyInt_AsLong(v);
4378 if (x == -1 && PyErr_Occurred()) {
4379 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004380 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004381 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004382 if (x < 0 && type == 'u') {
4383 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004384 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004385 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4386 sign = "-";
4387 else
4388 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004389 if (prec < 0)
4390 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004391
4392 if ((flags & F_ALT) &&
4393 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004394 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004395 * of issues that cause pain:
4396 * - when 0 is being converted, the C standard leaves off
4397 * the '0x' or '0X', which is inconsistent with other
4398 * %#x/%#X conversions and inconsistent with Python's
4399 * hex() function
4400 * - there are platforms that violate the standard and
4401 * convert 0 with the '0x' or '0X'
4402 * (Metrowerks, Compaq Tru64)
4403 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004404 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004405 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004406 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004407 * We can achieve the desired consistency by inserting our
4408 * own '0x' or '0X' prefix, and substituting %x/%X in place
4409 * of %#x/%#X.
4410 *
4411 * Note that this is the same approach as used in
4412 * formatint() in unicodeobject.c
4413 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004414 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4415 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004416 }
4417 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004418 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4419 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004420 prec, type);
4421 }
4422
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004423 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4424 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004425 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004426 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004427 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004428 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004429 return -1;
4430 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004431 if (sign[0])
4432 PyOS_snprintf(buf, buflen, fmt, -x);
4433 else
4434 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004435 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004436}
4437
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004438Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004439formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004440{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004441 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004442 if (PyString_Check(v)) {
4443 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004444 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004445 }
4446 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004447 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004448 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004449 }
4450 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004451 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004452}
4453
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004454/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4455
4456 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4457 chars are formatted. XXX This is a magic number. Each formatting
4458 routine does bounds checking to ensure no overflow, but a better
4459 solution may be to malloc a buffer of appropriate size for each
4460 format. For now, the current solution is sufficient.
4461*/
4462#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004463
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004464PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004465PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004466{
4467 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004468 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004469 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004470 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004471 PyObject *result, *orig_args;
4472#ifdef Py_USING_UNICODE
4473 PyObject *v, *w;
4474#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004475 PyObject *dict = NULL;
4476 if (format == NULL || !PyString_Check(format) || args == NULL) {
4477 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004478 return NULL;
4479 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004480 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004481 fmt = PyString_AS_STRING(format);
4482 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004483 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004484 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004485 if (result == NULL)
4486 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004487 res = PyString_AsString(result);
4488 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004489 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004490 argidx = 0;
4491 }
4492 else {
4493 arglen = -1;
4494 argidx = -2;
4495 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004496 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4497 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004498 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004499 while (--fmtcnt >= 0) {
4500 if (*fmt != '%') {
4501 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004502 rescnt = fmtcnt + 100;
4503 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004504 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004505 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004506 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004507 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004508 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004509 }
4510 *res++ = *fmt++;
4511 }
4512 else {
4513 /* Got a format specifier */
4514 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004515 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004516 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004517 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004518 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004519 PyObject *v = NULL;
4520 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004521 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004522 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004523 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004524 char formatbuf[FORMATBUFLEN];
4525 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004526#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004527 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004528 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004529#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004530
Guido van Rossumda9c2711996-12-05 21:58:58 +00004531 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004532 if (*fmt == '(') {
4533 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004534 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004535 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004536 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004537
4538 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004539 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004540 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004541 goto error;
4542 }
4543 ++fmt;
4544 --fmtcnt;
4545 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004546 /* Skip over balanced parentheses */
4547 while (pcount > 0 && --fmtcnt >= 0) {
4548 if (*fmt == ')')
4549 --pcount;
4550 else if (*fmt == '(')
4551 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004552 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004553 }
4554 keylen = fmt - keystart - 1;
4555 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004556 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004557 "incomplete format key");
4558 goto error;
4559 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004560 key = PyString_FromStringAndSize(keystart,
4561 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004562 if (key == NULL)
4563 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004564 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004565 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004566 args_owned = 0;
4567 }
4568 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004569 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004570 if (args == NULL) {
4571 goto error;
4572 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004573 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004574 arglen = -1;
4575 argidx = -2;
4576 }
Guido van Rossume5372401993-03-16 12:15:04 +00004577 while (--fmtcnt >= 0) {
4578 switch (c = *fmt++) {
4579 case '-': flags |= F_LJUST; continue;
4580 case '+': flags |= F_SIGN; continue;
4581 case ' ': flags |= F_BLANK; continue;
4582 case '#': flags |= F_ALT; continue;
4583 case '0': flags |= F_ZERO; continue;
4584 }
4585 break;
4586 }
4587 if (c == '*') {
4588 v = getnextarg(args, arglen, &argidx);
4589 if (v == NULL)
4590 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004591 if (!PyInt_Check(v)) {
4592 PyErr_SetString(PyExc_TypeError,
4593 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004594 goto error;
4595 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004596 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004597 if (width < 0) {
4598 flags |= F_LJUST;
4599 width = -width;
4600 }
Guido van Rossume5372401993-03-16 12:15:04 +00004601 if (--fmtcnt >= 0)
4602 c = *fmt++;
4603 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004604 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004605 width = c - '0';
4606 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004607 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004608 if (!isdigit(c))
4609 break;
4610 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004611 PyErr_SetString(
4612 PyExc_ValueError,
4613 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004614 goto error;
4615 }
4616 width = width*10 + (c - '0');
4617 }
4618 }
4619 if (c == '.') {
4620 prec = 0;
4621 if (--fmtcnt >= 0)
4622 c = *fmt++;
4623 if (c == '*') {
4624 v = getnextarg(args, arglen, &argidx);
4625 if (v == NULL)
4626 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004627 if (!PyInt_Check(v)) {
4628 PyErr_SetString(
4629 PyExc_TypeError,
4630 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004631 goto error;
4632 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004633 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004634 if (prec < 0)
4635 prec = 0;
4636 if (--fmtcnt >= 0)
4637 c = *fmt++;
4638 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004639 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004640 prec = c - '0';
4641 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004642 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004643 if (!isdigit(c))
4644 break;
4645 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004646 PyErr_SetString(
4647 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004648 "prec too big");
4649 goto error;
4650 }
4651 prec = prec*10 + (c - '0');
4652 }
4653 }
4654 } /* prec */
4655 if (fmtcnt >= 0) {
4656 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004657 if (--fmtcnt >= 0)
4658 c = *fmt++;
4659 }
4660 }
4661 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004662 PyErr_SetString(PyExc_ValueError,
4663 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004664 goto error;
4665 }
4666 if (c != '%') {
4667 v = getnextarg(args, arglen, &argidx);
4668 if (v == NULL)
4669 goto error;
4670 }
4671 sign = 0;
4672 fill = ' ';
4673 switch (c) {
4674 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004675 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004676 len = 1;
4677 break;
4678 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004679#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004680 if (PyUnicode_Check(v)) {
4681 fmt = fmt_start;
4682 argidx = argidx_start;
4683 goto unicode;
4684 }
Georg Brandld45014b2005-10-01 17:06:00 +00004685#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004686 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004687#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004688 if (temp != NULL && PyUnicode_Check(temp)) {
4689 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004690 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004691 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004692 goto unicode;
4693 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004694#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004695 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004696 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004697 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004698 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004699 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004700 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004701 if (!PyString_Check(temp)) {
4702 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004703 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004704 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004705 goto error;
4706 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004707 pbuf = PyString_AS_STRING(temp);
4708 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004709 if (prec >= 0 && len > prec)
4710 len = prec;
4711 break;
4712 case 'i':
4713 case 'd':
4714 case 'u':
4715 case 'o':
4716 case 'x':
4717 case 'X':
4718 if (c == 'i')
4719 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004720 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004721 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004722 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004723 prec, c, &pbuf, &ilen);
4724 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004725 if (!temp)
4726 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004727 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004728 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004729 else {
4730 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004731 len = formatint(pbuf,
4732 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004733 flags, prec, c, v);
4734 if (len < 0)
4735 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004736 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004737 }
4738 if (flags & F_ZERO)
4739 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004740 break;
4741 case 'e':
4742 case 'E':
4743 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004744 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004745 case 'g':
4746 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004747 if (c == 'F')
4748 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004749 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004750 len = formatfloat(pbuf, sizeof(formatbuf),
4751 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004752 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004753 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004754 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004755 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004756 fill = '0';
4757 break;
4758 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004759#ifdef Py_USING_UNICODE
4760 if (PyUnicode_Check(v)) {
4761 fmt = fmt_start;
4762 argidx = argidx_start;
4763 goto unicode;
4764 }
4765#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004766 pbuf = formatbuf;
4767 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004768 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004769 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004770 break;
4771 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004772 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004773 "unsupported format character '%c' (0x%x) "
Armin Rigo4b63c212006-10-04 11:44:06 +00004774 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004775 c, c,
Armin Rigo4b63c212006-10-04 11:44:06 +00004776 (Py_ssize_t)(fmt - 1 -
4777 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004778 goto error;
4779 }
4780 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004781 if (*pbuf == '-' || *pbuf == '+') {
4782 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004783 len--;
4784 }
4785 else if (flags & F_SIGN)
4786 sign = '+';
4787 else if (flags & F_BLANK)
4788 sign = ' ';
4789 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004790 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004791 }
4792 if (width < len)
4793 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004794 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004795 reslen -= rescnt;
4796 rescnt = width + fmtcnt + 100;
4797 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004798 if (reslen < 0) {
4799 Py_DECREF(result);
Georg Brandl5f795862007-02-26 13:51:34 +00004800 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004801 return PyErr_NoMemory();
4802 }
Georg Brandl5f795862007-02-26 13:51:34 +00004803 if (_PyString_Resize(&result, reslen) < 0) {
4804 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004805 return NULL;
Georg Brandl5f795862007-02-26 13:51:34 +00004806 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004807 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004808 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004809 }
4810 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004811 if (fill != ' ')
4812 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004813 rescnt--;
4814 if (width > len)
4815 width--;
4816 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004817 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4818 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004819 assert(pbuf[1] == c);
4820 if (fill != ' ') {
4821 *res++ = *pbuf++;
4822 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004823 }
Tim Petersfff53252001-04-12 18:38:48 +00004824 rescnt -= 2;
4825 width -= 2;
4826 if (width < 0)
4827 width = 0;
4828 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004829 }
4830 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004831 do {
4832 --rescnt;
4833 *res++ = fill;
4834 } while (--width > len);
4835 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004836 if (fill == ' ') {
4837 if (sign)
4838 *res++ = sign;
4839 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004840 (c == 'x' || c == 'X')) {
4841 assert(pbuf[0] == '0');
4842 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004843 *res++ = *pbuf++;
4844 *res++ = *pbuf++;
4845 }
4846 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004847 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004848 res += len;
4849 rescnt -= len;
4850 while (--width >= len) {
4851 --rescnt;
4852 *res++ = ' ';
4853 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004854 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004855 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004856 "not all arguments converted during string formatting");
Georg Brandl5f795862007-02-26 13:51:34 +00004857 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004858 goto error;
4859 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004860 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004861 } /* '%' */
4862 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004863 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004864 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004865 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004866 goto error;
4867 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004868 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004869 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004870 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004871 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004872 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004873
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004874#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004875 unicode:
4876 if (args_owned) {
4877 Py_DECREF(args);
4878 args_owned = 0;
4879 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004880 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004881 if (PyTuple_Check(orig_args) && argidx > 0) {
4882 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004883 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004884 v = PyTuple_New(n);
4885 if (v == NULL)
4886 goto error;
4887 while (--n >= 0) {
4888 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4889 Py_INCREF(w);
4890 PyTuple_SET_ITEM(v, n, w);
4891 }
4892 args = v;
4893 } else {
4894 Py_INCREF(orig_args);
4895 args = orig_args;
4896 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004897 args_owned = 1;
4898 /* Take what we have of the result and let the Unicode formatting
4899 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004900 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004901 if (_PyString_Resize(&result, rescnt))
4902 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004903 fmtcnt = PyString_GET_SIZE(format) - \
4904 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004905 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4906 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004907 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004908 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004909 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004910 if (v == NULL)
4911 goto error;
4912 /* Paste what we have (result) to what the Unicode formatting
4913 function returned (v) and return the result (or error) */
4914 w = PyUnicode_Concat(result, v);
4915 Py_DECREF(result);
4916 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004917 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004918 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004919#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004920
Guido van Rossume5372401993-03-16 12:15:04 +00004921 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004922 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004923 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004924 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004925 }
Guido van Rossume5372401993-03-16 12:15:04 +00004926 return NULL;
4927}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004928
Guido van Rossum2a61e741997-01-18 07:55:05 +00004929void
Fred Drakeba096332000-07-09 07:04:36 +00004930PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004931{
4932 register PyStringObject *s = (PyStringObject *)(*p);
4933 PyObject *t;
4934 if (s == NULL || !PyString_Check(s))
4935 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004936 /* If it's a string subclass, we don't really know what putting
4937 it in the interned dict might do. */
4938 if (!PyString_CheckExact(s))
4939 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004940 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004941 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004942 if (interned == NULL) {
4943 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004944 if (interned == NULL) {
4945 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004946 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004947 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004948 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004949 t = PyDict_GetItem(interned, (PyObject *)s);
4950 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004951 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004952 Py_DECREF(*p);
4953 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004954 return;
4955 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004956
Armin Rigo79f7ad22004-08-07 19:27:39 +00004957 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004958 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004959 return;
4960 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004961 /* The two references in interned are not counted by refcnt.
4962 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004963 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004964 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004965}
4966
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004967void
4968PyString_InternImmortal(PyObject **p)
4969{
4970 PyString_InternInPlace(p);
4971 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4972 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4973 Py_INCREF(*p);
4974 }
4975}
4976
Guido van Rossum2a61e741997-01-18 07:55:05 +00004977
4978PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004979PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004980{
4981 PyObject *s = PyString_FromString(cp);
4982 if (s == NULL)
4983 return NULL;
4984 PyString_InternInPlace(&s);
4985 return s;
4986}
4987
Guido van Rossum8cf04761997-08-02 02:57:45 +00004988void
Fred Drakeba096332000-07-09 07:04:36 +00004989PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004990{
4991 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004992 for (i = 0; i < UCHAR_MAX + 1; i++) {
4993 Py_XDECREF(characters[i]);
4994 characters[i] = NULL;
4995 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004996 Py_XDECREF(nullstring);
4997 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004998}
Barry Warsawa903ad982001-02-23 16:40:48 +00004999
Barry Warsawa903ad982001-02-23 16:40:48 +00005000void _Py_ReleaseInternedStrings(void)
5001{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005002 PyObject *keys;
5003 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005004 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005005
5006 if (interned == NULL || !PyDict_Check(interned))
5007 return;
5008 keys = PyDict_Keys(interned);
5009 if (keys == NULL || !PyList_Check(keys)) {
5010 PyErr_Clear();
5011 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005012 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005013
5014 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5015 detector, interned strings are not forcibly deallocated; rather, we
5016 give them their stolen references back, and then clear and DECREF
5017 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005018
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005019 fprintf(stderr, "releasing interned strings\n");
5020 n = PyList_GET_SIZE(keys);
5021 for (i = 0; i < n; i++) {
5022 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5023 switch (s->ob_sstate) {
5024 case SSTATE_NOT_INTERNED:
5025 /* XXX Shouldn't happen */
5026 break;
5027 case SSTATE_INTERNED_IMMORTAL:
5028 s->ob_refcnt += 1;
5029 break;
5030 case SSTATE_INTERNED_MORTAL:
5031 s->ob_refcnt += 2;
5032 break;
5033 default:
5034 Py_FatalError("Inconsistent interned string state.");
5035 }
5036 s->ob_sstate = SSTATE_NOT_INTERNED;
5037 }
5038 Py_DECREF(keys);
5039 PyDict_Clear(interned);
5040 Py_DECREF(interned);
5041 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005042}