blob: 68bf70370a0575df94c39cbcde87b8b2d211b77d [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003#define PY_SSIZE_T_CLEAN
Thomas Wouters477c8d52006-05-27 19:21:47 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Thomas Wouters4d70c3d2006-06-08 14:42:34 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Thomas Wouters477c8d52006-05-27 19:21:47 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Thomas Wouters477c8d52006-05-27 19:21:47 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000274 else if (size_tflag)
Thomas Wouters477c8d52006-05-27 19:21:47 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000288 else
Thomas Wouters477c8d52006-05-27 19:21:47 +0000289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
Barry Warsawdadace02001-08-24 18:32:06 +0000291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000536 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000627 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000651 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000652 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000653 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000671 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 "Unicode escapes not legal "
673 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000674 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000675 }
676#endif
677 default:
678 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000682 }
683 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000684 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000685 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
Thomas Wouters477c8d52006-05-27 19:21:47 +0000692/* -------------------------------------------------------------------- */
693/* object api */
694
Martin v. Löwis18e16552006-02-15 17:27:45 +0000695static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000696string_getsize(register PyObject *op)
697{
698 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000699 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000709 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
Martin v. Löwis18e16552006-02-15 17:27:45 +0000715Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000716PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000717{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718 if (!PyString_Check(op))
719 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000720 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000721}
722
723/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000724PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000726 if (!PyString_Check(op))
727 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000728 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000729}
730
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000734 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000742#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000748 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000749#endif
750 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
Thomas Wouters477c8d52006-05-27 19:21:47 +0000769/* -------------------------------------------------------------------- */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000770/* Methods */
771
Thomas Wouters477c8d52006-05-27 19:21:47 +0000772#define STRINGLIB_CHAR char
773
774#define STRINGLIB_CMP memcmp
775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
778
779#define STRINGLIB_EMPTY nullstring
780
781#include "stringlib/fastsearch.h"
782
783#include "stringlib/count.h"
784#include "stringlib/find.h"
785#include "stringlib/partition.h"
786
787
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000788static int
Fred Drakeba096332000-07-09 07:04:36 +0000789string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000790{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000791 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000793 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000794
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000795 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000806 if (flags & Py_PRINT_RAW) {
Thomas Wouters89f507f2006-12-13 04:49:30 +0000807 char *data = op->ob_sval;
808 Py_ssize_t size = op->ob_size;
809 while (size > INT_MAX) {
810 /* Very long strings cannot be written atomically.
811 * But don't write exactly INT_MAX bytes at a time
812 * to avoid memory aligment issues.
813 */
814 const int chunk_size = INT_MAX & ~0x3FFF;
815 fwrite(data, 1, chunk_size, fp);
816 data += chunk_size;
817 size -= chunk_size;
818 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000819#ifdef __VMS
Thomas Wouters89f507f2006-12-13 04:49:30 +0000820 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000821#else
Thomas Wouters89f507f2006-12-13 04:49:30 +0000822 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000823#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000824 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000826
Thomas Wouters7e474022000-07-16 12:04:32 +0000827 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000828 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000829 if (memchr(op->ob_sval, '\'', op->ob_size) &&
830 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000831 quote = '"';
832
833 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000834 for (i = 0; i < op->ob_size; i++) {
835 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000837 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000838 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000839 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000840 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000841 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000842 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000843 fprintf(fp, "\\r");
844 else if (c < ' ' || c >= 0x7f)
845 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000846 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000847 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000848 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000849 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000850 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000851}
852
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000853PyObject *
854PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000855{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000856 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000857 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000858 PyObject *v;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000859 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != op->ob_size) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000860 PyErr_SetString(PyExc_OverflowError,
861 "string is too large to make repr");
862 }
863 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000864 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000865 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000866 }
867 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000868 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 register char c;
870 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000871 int quote;
872
Thomas Wouters7e474022000-07-16 12:04:32 +0000873 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000874 quote = '\'';
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000875 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000876 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000877 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000878 quote = '"';
879
Tim Peters9161c8b2001-12-03 01:55:38 +0000880 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000881 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000882 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000883 /* There's at least enough room for a hex escape
884 and a closing quote. */
885 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000886 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000887 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000888 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000889 else if (c == '\t')
890 *p++ = '\\', *p++ = 't';
891 else if (c == '\n')
892 *p++ = '\\', *p++ = 'n';
893 else if (c == '\r')
894 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000895 else if (c < ' ' || c >= 0x7f) {
896 /* For performance, we don't want to call
897 PyOS_snprintf here (extra layers of
898 function call). */
899 sprintf(p, "\\x%02x", c & 0xff);
900 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000901 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000902 else
903 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000904 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000905 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000906 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000907 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000908 _PyString_Resize(
Thomas Woutersd4ec0c32006-04-21 16:44:05 +0000909 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000910 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000911 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912}
913
Guido van Rossum189f1df2001-05-01 16:51:53 +0000914static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000915string_repr(PyObject *op)
916{
917 return PyString_Repr(op, 1);
918}
919
920static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000921string_str(PyObject *s)
922{
Tim Petersc9933152001-10-16 20:18:24 +0000923 assert(PyString_Check(s));
924 if (PyString_CheckExact(s)) {
925 Py_INCREF(s);
926 return s;
927 }
928 else {
929 /* Subtype -- return genuine string with the same value. */
930 PyStringObject *t = (PyStringObject *) s;
931 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
932 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000933}
934
Martin v. Löwis18e16552006-02-15 17:27:45 +0000935static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000936string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000937{
938 return a->ob_size;
939}
940
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000942string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943{
Thomas Wouters477c8d52006-05-27 19:21:47 +0000944 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000945 register PyStringObject *op;
946 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000947#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 if (PyUnicode_Check(bb))
949 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000950#endif
Guido van Rossumad7d8d12007-04-13 01:39:34 +0000951 if (PyBytes_Check(bb))
952 return PyBytes_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000953 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000954 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000955 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000956 return NULL;
957 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000958#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000959 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000960 if ((a->ob_size == 0 || b->ob_size == 0) &&
961 PyString_CheckExact(a) && PyString_CheckExact(b)) {
962 if (a->ob_size == 0) {
963 Py_INCREF(bb);
964 return bb;
965 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000966 Py_INCREF(a);
967 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 }
969 size = a->ob_size + b->ob_size;
Thomas Wouters477c8d52006-05-27 19:21:47 +0000970 if (size < 0) {
971 PyErr_SetString(PyExc_OverflowError,
972 "strings are too large to concat");
973 return NULL;
974 }
975
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000976 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000977 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000978 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000979 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000980 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000981 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000982 op->ob_sstate = SSTATE_NOT_INTERNED;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +0000983 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
984 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000985 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000986 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000987#undef b
988}
989
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000990static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000991string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000993 register Py_ssize_t i;
994 register Py_ssize_t j;
995 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000996 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000997 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000998 if (n < 0)
999 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001000 /* watch out for overflows: the size can overflow int,
1001 * and the # of bytes needed can overflow size_t
1002 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001003 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +00001004 if (n && size / n != a->ob_size) {
1005 PyErr_SetString(PyExc_OverflowError,
1006 "repeated string is too long");
1007 return NULL;
1008 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001009 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001010 Py_INCREF(a);
1011 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001012 }
Tim Peterse7c05322004-06-27 17:24:49 +00001013 nbytes = (size_t)size;
1014 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001015 PyErr_SetString(PyExc_OverflowError,
1016 "repeated string is too long");
1017 return NULL;
1018 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001020 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001021 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001022 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001023 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001024 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001025 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001026 op->ob_sval[size] = '\0';
1027 if (a->ob_size == 1 && n > 0) {
1028 memset(op->ob_sval, a->ob_sval[0] , n);
1029 return (PyObject *) op;
1030 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001031 i = 0;
1032 if (i < size) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001033 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001034 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +00001035 }
1036 while (i < size) {
1037 j = (i <= size-i) ? i : size-i;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001038 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001039 i += j;
1040 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001041 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001042}
1043
1044/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1045
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001046static PyObject *
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001047string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001048 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001049 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001050{
1051 if (i < 0)
1052 i = 0;
1053 if (j < 0)
1054 j = 0; /* Avoid signed/unsigned bug in next line */
1055 if (j > a->ob_size)
1056 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001057 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1058 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001059 Py_INCREF(a);
1060 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001061 }
1062 if (j < i)
1063 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001064 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001065}
1066
Guido van Rossum9284a572000-03-07 15:53:43 +00001067static int
Thomas Wouters477c8d52006-05-27 19:21:47 +00001068string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001069{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001070 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001071#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00001072 if (PyUnicode_Check(sub_obj))
1073 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001074#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00001075 if (!PyString_Check(sub_obj)) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001076 PyErr_Format(PyExc_TypeError,
1077 "'in <string>' requires string as left operand, "
1078 "not %.200s", sub_obj->ob_type->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001079 return -1;
1080 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001081 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001082
Thomas Wouters477c8d52006-05-27 19:21:47 +00001083 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001084}
1085
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001086static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001087string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001088{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001089 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001090 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001091 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001092 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001093 return NULL;
1094 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001095 pchar = a->ob_sval[i];
1096 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001097 if (v == NULL)
Thomas Wouters477c8d52006-05-27 19:21:47 +00001098 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001099 else {
1100#ifdef COUNT_ALLOCS
1101 one_strings++;
1102#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001103 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001104 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001105 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001106}
1107
Martin v. Löwiscd353062001-05-24 16:56:35 +00001108static PyObject*
1109string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001110{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001111 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001112 Py_ssize_t len_a, len_b;
1113 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001114 PyObject *result;
1115
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001116 /* Make sure both arguments are strings. */
1117 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001118 result = Py_NotImplemented;
1119 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001120 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001121 if (a == b) {
1122 switch (op) {
1123 case Py_EQ:case Py_LE:case Py_GE:
1124 result = Py_True;
1125 goto out;
1126 case Py_NE:case Py_LT:case Py_GT:
1127 result = Py_False;
1128 goto out;
1129 }
1130 }
1131 if (op == Py_EQ) {
1132 /* Supporting Py_NE here as well does not save
1133 much time, since Py_NE is rarely used. */
1134 if (a->ob_size == b->ob_size
1135 && (a->ob_sval[0] == b->ob_sval[0]
Thomas Wouters27d517b2007-02-25 20:39:11 +00001136 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001137 result = Py_True;
1138 } else {
1139 result = Py_False;
1140 }
1141 goto out;
1142 }
1143 len_a = a->ob_size; len_b = b->ob_size;
1144 min_len = (len_a < len_b) ? len_a : len_b;
1145 if (min_len > 0) {
1146 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1147 if (c==0)
1148 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Thomas Wouters27d517b2007-02-25 20:39:11 +00001149 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001150 c = 0;
1151 if (c == 0)
1152 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1153 switch (op) {
1154 case Py_LT: c = c < 0; break;
1155 case Py_LE: c = c <= 0; break;
1156 case Py_EQ: assert(0); break; /* unreachable */
1157 case Py_NE: c = c != 0; break;
1158 case Py_GT: c = c > 0; break;
1159 case Py_GE: c = c >= 0; break;
1160 default:
1161 result = Py_NotImplemented;
1162 goto out;
1163 }
1164 result = c ? Py_True : Py_False;
1165 out:
1166 Py_INCREF(result);
1167 return result;
1168}
1169
1170int
1171_PyString_Eq(PyObject *o1, PyObject *o2)
1172{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001173 PyStringObject *a = (PyStringObject*) o1;
1174 PyStringObject *b = (PyStringObject*) o2;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001175 return a->ob_size == b->ob_size
1176 && *a->ob_sval == *b->ob_sval
1177 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001178}
1179
Guido van Rossum9bfef441993-03-29 10:43:31 +00001180static long
Fred Drakeba096332000-07-09 07:04:36 +00001181string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001182{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001183 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001184 register unsigned char *p;
1185 register long x;
1186
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001187 if (a->ob_shash != -1)
1188 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001189 len = a->ob_size;
1190 p = (unsigned char *) a->ob_sval;
1191 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001192 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001193 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001194 x ^= a->ob_size;
1195 if (x == -1)
1196 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001197 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001198 return x;
1199}
1200
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001201static PyObject*
1202string_subscript(PyStringObject* self, PyObject* item)
1203{
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001204 if (PyIndex_Check(item)) {
1205 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001206 if (i == -1 && PyErr_Occurred())
1207 return NULL;
1208 if (i < 0)
1209 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001210 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001211 }
1212 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001213 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001214 char* source_buf;
1215 char* result_buf;
1216 PyObject* result;
1217
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001218 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001219 PyString_GET_SIZE(self),
1220 &start, &stop, &step, &slicelength) < 0) {
1221 return NULL;
1222 }
1223
1224 if (slicelength <= 0) {
1225 return PyString_FromStringAndSize("", 0);
1226 }
1227 else {
1228 source_buf = PyString_AsString((PyObject*)self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001229 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001230 if (result_buf == NULL)
1231 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001232
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001233 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001234 cur += step, i++) {
1235 result_buf[i] = source_buf[cur];
1236 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001237
1238 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001239 slicelength);
1240 PyMem_Free(result_buf);
1241 return result;
1242 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001243 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001244 else {
Thomas Wouters89f507f2006-12-13 04:49:30 +00001245 PyErr_Format(PyExc_TypeError,
1246 "string indices must be integers, not %.200s",
1247 item->ob_type->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001248 return NULL;
1249 }
1250}
1251
Martin v. Löwis18e16552006-02-15 17:27:45 +00001252static Py_ssize_t
1253string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001254{
1255 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001256 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001257 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001258 return -1;
1259 }
1260 *ptr = (void *)self->ob_sval;
1261 return self->ob_size;
1262}
1263
Martin v. Löwis18e16552006-02-15 17:27:45 +00001264static Py_ssize_t
1265string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001266{
Guido van Rossum045e6881997-09-08 18:30:11 +00001267 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001268 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001269 return -1;
1270}
1271
Martin v. Löwis18e16552006-02-15 17:27:45 +00001272static Py_ssize_t
1273string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274{
1275 if ( lenp )
1276 *lenp = self->ob_size;
1277 return 1;
1278}
1279
Martin v. Löwis18e16552006-02-15 17:27:45 +00001280static Py_ssize_t
1281string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001282{
1283 if ( index != 0 ) {
1284 PyErr_SetString(PyExc_SystemError,
1285 "accessing non-existent string segment");
1286 return -1;
1287 }
1288 *ptr = self->ob_sval;
1289 return self->ob_size;
1290}
1291
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001292static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001294 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001295 (ssizeargfunc)string_repeat, /*sq_repeat*/
1296 (ssizeargfunc)string_item, /*sq_item*/
1297 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001298 0, /*sq_ass_item*/
1299 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001300 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001301};
1302
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001303static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001304 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001305 (binaryfunc)string_subscript,
1306 0,
1307};
1308
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001309static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001310 (readbufferproc)string_buffer_getreadbuf,
1311 (writebufferproc)string_buffer_getwritebuf,
1312 (segcountproc)string_buffer_getsegcount,
1313 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001314};
1315
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001316
1317
1318#define LEFTSTRIP 0
1319#define RIGHTSTRIP 1
1320#define BOTHSTRIP 2
1321
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001322/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001323static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1324
1325#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001326
Thomas Wouters477c8d52006-05-27 19:21:47 +00001327
1328/* Don't call if length < 2 */
1329#define Py_STRING_MATCH(target, offset, pattern, length) \
1330 (target[offset] == pattern[0] && \
1331 target[offset+length-1] == pattern[length-1] && \
1332 !memcmp(target+offset+1, pattern+1, length-2) )
1333
1334
1335/* Overallocate the initial list to reduce the number of reallocs for small
1336 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1337 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1338 text (roughly 11 words per line) and field delimited data (usually 1-10
1339 fields). For large strings the split algorithms are bandwidth limited
1340 so increasing the preallocation likely will not improve things.*/
1341
1342#define MAX_PREALLOC 12
1343
1344/* 5 splits gives 6 elements */
1345#define PREALLOC_SIZE(maxsplit) \
1346 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1347
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001348#define SPLIT_APPEND(data, left, right) \
1349 str = PyString_FromStringAndSize((data) + (left), \
1350 (right) - (left)); \
1351 if (str == NULL) \
1352 goto onError; \
1353 if (PyList_Append(list, str)) { \
1354 Py_DECREF(str); \
1355 goto onError; \
1356 } \
1357 else \
1358 Py_DECREF(str);
1359
Thomas Wouters477c8d52006-05-27 19:21:47 +00001360#define SPLIT_ADD(data, left, right) { \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001361 str = PyString_FromStringAndSize((data) + (left), \
1362 (right) - (left)); \
1363 if (str == NULL) \
1364 goto onError; \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001365 if (count < MAX_PREALLOC) { \
1366 PyList_SET_ITEM(list, count, str); \
1367 } else { \
1368 if (PyList_Append(list, str)) { \
1369 Py_DECREF(str); \
1370 goto onError; \
1371 } \
1372 else \
1373 Py_DECREF(str); \
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001374 } \
Thomas Wouters477c8d52006-05-27 19:21:47 +00001375 count++; }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001376
Thomas Wouters477c8d52006-05-27 19:21:47 +00001377/* Always force the list to the expected size. */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001378#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
Thomas Wouters477c8d52006-05-27 19:21:47 +00001379
1380#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1381#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1382#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1383#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1384
1385Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001386split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001387{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001388 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001389 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001390 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001391
1392 if (list == NULL)
1393 return NULL;
1394
Thomas Wouters477c8d52006-05-27 19:21:47 +00001395 i = j = 0;
1396
1397 while (maxsplit-- > 0) {
1398 SKIP_SPACE(s, i, len);
1399 if (i==len) break;
1400 j = i; i++;
1401 SKIP_NONSPACE(s, i, len);
1402 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001403 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001404
1405 if (i < len) {
1406 /* Only occurs when maxsplit was reached */
1407 /* Skip any remaining whitespace and copy to end of string */
1408 SKIP_SPACE(s, i, len);
1409 if (i != len)
1410 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001411 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001412 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001414 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 Py_DECREF(list);
1416 return NULL;
1417}
1418
Thomas Wouters477c8d52006-05-27 19:21:47 +00001419Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001420split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001421{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001422 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001423 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001424 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001425
1426 if (list == NULL)
1427 return NULL;
1428
Thomas Wouters477c8d52006-05-27 19:21:47 +00001429 i = j = 0;
1430 while ((j < len) && (maxcount-- > 0)) {
1431 for(; j<len; j++) {
1432 /* I found that using memchr makes no difference */
1433 if (s[j] == ch) {
1434 SPLIT_ADD(s, i, j);
1435 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001436 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001437 }
1438 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001439 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001440 if (i <= len) {
1441 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001443 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001444 return list;
1445
1446 onError:
1447 Py_DECREF(list);
1448 return NULL;
1449}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001451PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001452"S.split([sep [,maxsplit]]) -> list of strings\n\
1453\n\
1454Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001455delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001456splits are done. If sep is not specified or is None, any\n\
1457whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001458
1459static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001460string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001462 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001463 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001464 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001465 PyObject *list, *str, *subobj = Py_None;
1466#ifdef USE_FAST
1467 Py_ssize_t pos;
1468#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001469
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001470 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001471 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001472 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001473 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001474 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001475 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001476 if (PyString_Check(subobj)) {
1477 sub = PyString_AS_STRING(subobj);
1478 n = PyString_GET_SIZE(subobj);
1479 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001480#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001481 else if (PyUnicode_Check(subobj))
1482 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001483#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001484 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1485 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001486
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487 if (n == 0) {
1488 PyErr_SetString(PyExc_ValueError, "empty separator");
1489 return NULL;
1490 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001491 else if (n == 1)
1492 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493
Thomas Wouters477c8d52006-05-27 19:21:47 +00001494 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 if (list == NULL)
1496 return NULL;
1497
Thomas Wouters477c8d52006-05-27 19:21:47 +00001498#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 i = j = 0;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001500 while (maxsplit-- > 0) {
1501 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1502 if (pos < 0)
1503 break;
1504 j = i+pos;
1505 SPLIT_ADD(s, i, j);
1506 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001508#else
1509 i = j = 0;
1510 while ((j+n <= len) && (maxsplit-- > 0)) {
1511 for (; j+n <= len; j++) {
1512 if (Py_STRING_MATCH(s, j, sub, n)) {
1513 SPLIT_ADD(s, i, j);
1514 i = j = j + n;
1515 break;
1516 }
1517 }
1518 }
1519#endif
1520 SPLIT_ADD(s, i, len);
1521 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 return list;
1523
Thomas Wouters477c8d52006-05-27 19:21:47 +00001524 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001525 Py_DECREF(list);
1526 return NULL;
1527}
1528
Thomas Wouters477c8d52006-05-27 19:21:47 +00001529PyDoc_STRVAR(partition__doc__,
1530"S.partition(sep) -> (head, sep, tail)\n\
1531\n\
1532Searches for the separator sep in S, and returns the part before it,\n\
1533the separator itself, and the part after it. If the separator is not\n\
1534found, returns S and two empty strings.");
1535
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001536static PyObject *
Thomas Wouters477c8d52006-05-27 19:21:47 +00001537string_partition(PyStringObject *self, PyObject *sep_obj)
1538{
1539 const char *sep;
1540 Py_ssize_t sep_len;
1541
1542 if (PyString_Check(sep_obj)) {
1543 sep = PyString_AS_STRING(sep_obj);
1544 sep_len = PyString_GET_SIZE(sep_obj);
1545 }
1546#ifdef Py_USING_UNICODE
1547 else if (PyUnicode_Check(sep_obj))
1548 return PyUnicode_Partition((PyObject *) self, sep_obj);
1549#endif
1550 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1551 return NULL;
1552
1553 return stringlib_partition(
1554 (PyObject*) self,
1555 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1556 sep_obj, sep, sep_len
1557 );
1558}
1559
1560PyDoc_STRVAR(rpartition__doc__,
Thomas Wouters89f507f2006-12-13 04:49:30 +00001561"S.rpartition(sep) -> (tail, sep, head)\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00001562\n\
1563Searches for the separator sep in S, starting at the end of S, and returns\n\
1564the part before it, the separator itself, and the part after it. If the\n\
Thomas Wouters89f507f2006-12-13 04:49:30 +00001565separator is not found, returns two empty strings and S.");
Thomas Wouters477c8d52006-05-27 19:21:47 +00001566
1567static PyObject *
1568string_rpartition(PyStringObject *self, PyObject *sep_obj)
1569{
1570 const char *sep;
1571 Py_ssize_t sep_len;
1572
1573 if (PyString_Check(sep_obj)) {
1574 sep = PyString_AS_STRING(sep_obj);
1575 sep_len = PyString_GET_SIZE(sep_obj);
1576 }
1577#ifdef Py_USING_UNICODE
1578 else if (PyUnicode_Check(sep_obj))
1579 return PyUnicode_Partition((PyObject *) self, sep_obj);
1580#endif
1581 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1582 return NULL;
1583
1584 return stringlib_rpartition(
1585 (PyObject*) self,
1586 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1587 sep_obj, sep, sep_len
1588 );
1589}
1590
1591Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001592rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001593{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001594 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001595 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001596 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001597
1598 if (list == NULL)
1599 return NULL;
1600
Thomas Wouters477c8d52006-05-27 19:21:47 +00001601 i = j = len-1;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001602
Thomas Wouters477c8d52006-05-27 19:21:47 +00001603 while (maxsplit-- > 0) {
1604 RSKIP_SPACE(s, i);
1605 if (i<0) break;
1606 j = i; i--;
1607 RSKIP_NONSPACE(s, i);
1608 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001609 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001610 if (i >= 0) {
1611 /* Only occurs when maxsplit was reached */
1612 /* Skip any remaining whitespace and copy to beginning of string */
1613 RSKIP_SPACE(s, i);
1614 if (i >= 0)
1615 SPLIT_ADD(s, 0, i + 1);
1616
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001617 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001618 FIX_PREALLOC_SIZE(list);
1619 if (PyList_Reverse(list) < 0)
1620 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001621 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001622 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001623 Py_DECREF(list);
1624 return NULL;
1625}
1626
Thomas Wouters477c8d52006-05-27 19:21:47 +00001627Py_LOCAL_INLINE(PyObject *)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001628rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001629{
Thomas Wouters477c8d52006-05-27 19:21:47 +00001630 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631 PyObject *str;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001632 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001633
1634 if (list == NULL)
1635 return NULL;
1636
Thomas Wouters477c8d52006-05-27 19:21:47 +00001637 i = j = len - 1;
1638 while ((i >= 0) && (maxcount-- > 0)) {
1639 for (; i >= 0; i--) {
1640 if (s[i] == ch) {
1641 SPLIT_ADD(s, i + 1, j + 1);
1642 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001643 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001644 }
1645 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001646 }
1647 if (j >= -1) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00001648 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001649 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00001650 FIX_PREALLOC_SIZE(list);
1651 if (PyList_Reverse(list) < 0)
1652 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001653 return list;
1654
1655 onError:
1656 Py_DECREF(list);
1657 return NULL;
1658}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001659
1660PyDoc_STRVAR(rsplit__doc__,
1661"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1662\n\
1663Return a list of the words in the string S, using sep as the\n\
1664delimiter string, starting at the end of the string and working\n\
1665to the front. If maxsplit is given, at most maxsplit splits are\n\
1666done. If sep is not specified or is None, any whitespace string\n\
1667is a separator.");
1668
1669static PyObject *
1670string_rsplit(PyStringObject *self, PyObject *args)
1671{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001672 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001673 Py_ssize_t maxsplit = -1, count=0;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001674 const char *s = PyString_AS_STRING(self), *sub;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001675 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001676
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001677 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001678 return NULL;
1679 if (maxsplit < 0)
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001680 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001681 if (subobj == Py_None)
1682 return rsplit_whitespace(s, len, maxsplit);
1683 if (PyString_Check(subobj)) {
1684 sub = PyString_AS_STRING(subobj);
1685 n = PyString_GET_SIZE(subobj);
1686 }
1687#ifdef Py_USING_UNICODE
1688 else if (PyUnicode_Check(subobj))
1689 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1690#endif
1691 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1692 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001693
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001694 if (n == 0) {
1695 PyErr_SetString(PyExc_ValueError, "empty separator");
1696 return NULL;
1697 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001698 else if (n == 1)
1699 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001700
Thomas Wouters477c8d52006-05-27 19:21:47 +00001701 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001702 if (list == NULL)
1703 return NULL;
1704
1705 j = len;
1706 i = j - n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001707
Thomas Wouters477c8d52006-05-27 19:21:47 +00001708 while ( (i >= 0) && (maxsplit-- > 0) ) {
1709 for (; i>=0; i--) {
1710 if (Py_STRING_MATCH(s, i, sub, n)) {
1711 SPLIT_ADD(s, i + n, j);
1712 j = i;
1713 i -= n;
1714 break;
1715 }
1716 }
1717 }
1718 SPLIT_ADD(s, 0, j);
1719 FIX_PREALLOC_SIZE(list);
1720 if (PyList_Reverse(list) < 0)
1721 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001722 return list;
1723
Thomas Wouters477c8d52006-05-27 19:21:47 +00001724onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001725 Py_DECREF(list);
1726 return NULL;
1727}
1728
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001730PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731"S.join(sequence) -> string\n\
1732\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001734sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735
1736static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001737string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738{
1739 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001740 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001743 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001744 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001745 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001746 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747
Tim Peters19fe14e2001-01-19 03:03:47 +00001748 seq = PySequence_Fast(orig, "");
1749 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001750 return NULL;
1751 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001752
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001753 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001754 if (seqlen == 0) {
1755 Py_DECREF(seq);
1756 return PyString_FromString("");
1757 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001759 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001760 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1761 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001762 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001763 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001764 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001766
Raymond Hettinger674f2412004-08-23 23:23:54 +00001767 /* There are at least two things to join, or else we have a subclass
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001768 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001769 * Do a pre-pass to figure out the total amount of space we'll
1770 * need (sz), see whether any argument is absurd, and defer to
1771 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001772 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001773 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001774 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001775 item = PySequence_Fast_GET_ITEM(seq, i);
1776 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001777#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001778 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001779 /* Defer to Unicode join.
1780 * CAUTION: There's no gurantee that the
1781 * original sequence can be iterated over
1782 * again, so we must pass seq here.
1783 */
1784 PyObject *result;
1785 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001786 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001787 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001788 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001789#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001790 PyErr_Format(PyExc_TypeError,
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001791 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001792 " %.80s found",
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001793 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001794 Py_DECREF(seq);
1795 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001796 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001797 sz += PyString_GET_SIZE(item);
1798 if (i != 0)
1799 sz += seplen;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001800 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001801 PyErr_SetString(PyExc_OverflowError,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001802 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001803 Py_DECREF(seq);
1804 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001806 }
1807
1808 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001809 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001810 if (res == NULL) {
1811 Py_DECREF(seq);
1812 return NULL;
1813 }
1814
1815 /* Catenate everything. */
1816 p = PyString_AS_STRING(res);
1817 for (i = 0; i < seqlen; ++i) {
1818 size_t n;
1819 item = PySequence_Fast_GET_ITEM(seq, i);
1820 n = PyString_GET_SIZE(item);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001821 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001822 p += n;
1823 if (i < seqlen - 1) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001824 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001825 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001826 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001828
Jeremy Hylton49048292000-07-11 03:28:17 +00001829 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001830 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831}
1832
Tim Peters52e155e2001-06-16 05:42:57 +00001833PyObject *
1834_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001835{
Tim Petersa7259592001-06-16 05:11:17 +00001836 assert(sep != NULL && PyString_Check(sep));
1837 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001838 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001839}
1840
Thomas Wouters477c8d52006-05-27 19:21:47 +00001841Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001842string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001843{
1844 if (*end > len)
1845 *end = len;
1846 else if (*end < 0)
1847 *end += len;
1848 if (*end < 0)
1849 *end = 0;
1850 if (*start < 0)
1851 *start += len;
1852 if (*start < 0)
1853 *start = 0;
1854}
1855
Thomas Wouters477c8d52006-05-27 19:21:47 +00001856Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001857string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001859 PyObject *subobj;
Thomas Wouters477c8d52006-05-27 19:21:47 +00001860 const char *sub;
1861 Py_ssize_t sub_len;
1862 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863
Thomas Wouters477c8d52006-05-27 19:21:47 +00001864 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1865 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001866 return -2;
1867 if (PyString_Check(subobj)) {
1868 sub = PyString_AS_STRING(subobj);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001869 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001871#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001872 else if (PyUnicode_Check(subobj))
Thomas Wouters477c8d52006-05-27 19:21:47 +00001873 return PyUnicode_Find(
1874 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001875#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00001876 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00001877 /* XXX - the "expected a character buffer object" is pretty
1878 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879 return -2;
1880
Thomas Wouters477c8d52006-05-27 19:21:47 +00001881 if (dir > 0)
1882 return stringlib_find_slice(
1883 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1884 sub, sub_len, start, end);
1885 else
1886 return stringlib_rfind_slice(
1887 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1888 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889}
1890
1891
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001892PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001893"S.find(sub [,start [,end]]) -> int\n\
1894\n\
1895Return the lowest index in S where substring sub is found,\n\
1896such that sub is contained within s[start,end]. Optional\n\
1897arguments start and end are interpreted as in slice notation.\n\
1898\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001899Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900
1901static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001902string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001904 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905 if (result == -2)
1906 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001907 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908}
1909
1910
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001911PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001912"S.index(sub [,start [,end]]) -> int\n\
1913\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001914Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915
1916static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001917string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001919 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001920 if (result == -2)
1921 return NULL;
1922 if (result == -1) {
1923 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001924 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001925 return NULL;
1926 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001927 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001928}
1929
1930
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001931PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001932"S.rfind(sub [,start [,end]]) -> int\n\
1933\n\
1934Return the highest index in S where substring sub is found,\n\
1935such that sub is contained within s[start,end]. Optional\n\
1936arguments start and end are interpreted as in slice notation.\n\
1937\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001938Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939
1940static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001941string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001942{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001943 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001944 if (result == -2)
1945 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001946 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001947}
1948
1949
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001950PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951"S.rindex(sub [,start [,end]]) -> int\n\
1952\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001953Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001954
1955static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001956string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001957{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001958 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959 if (result == -2)
1960 return NULL;
1961 if (result == -1) {
1962 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001963 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001964 return NULL;
1965 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001966 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967}
1968
1969
Thomas Wouters477c8d52006-05-27 19:21:47 +00001970Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001971do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1972{
1973 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001974 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001975 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001976 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1977 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001978
1979 i = 0;
1980 if (striptype != RIGHTSTRIP) {
1981 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1982 i++;
1983 }
1984 }
1985
1986 j = len;
1987 if (striptype != LEFTSTRIP) {
1988 do {
1989 j--;
1990 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1991 j++;
1992 }
1993
1994 if (i == 0 && j == len && PyString_CheckExact(self)) {
1995 Py_INCREF(self);
1996 return (PyObject*)self;
1997 }
1998 else
1999 return PyString_FromStringAndSize(s+i, j-i);
2000}
2001
2002
Thomas Wouters477c8d52006-05-27 19:21:47 +00002003Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002004do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002005{
2006 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002007 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009 i = 0;
2010 if (striptype != RIGHTSTRIP) {
2011 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2012 i++;
2013 }
2014 }
2015
2016 j = len;
2017 if (striptype != LEFTSTRIP) {
2018 do {
2019 j--;
2020 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2021 j++;
2022 }
2023
Tim Peters8fa5dd02001-09-12 02:18:30 +00002024 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025 Py_INCREF(self);
2026 return (PyObject*)self;
2027 }
2028 else
2029 return PyString_FromStringAndSize(s+i, j-i);
2030}
2031
2032
Thomas Wouters477c8d52006-05-27 19:21:47 +00002033Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002034do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2035{
2036 PyObject *sep = NULL;
2037
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002038 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002039 return NULL;
2040
2041 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002042 if (PyString_Check(sep))
2043 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002044#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002045 else if (PyUnicode_Check(sep)) {
2046 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2047 PyObject *res;
2048 if (uniself==NULL)
2049 return NULL;
2050 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2051 striptype, sep);
2052 Py_DECREF(uniself);
2053 return res;
2054 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002055#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002056 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002057#ifdef Py_USING_UNICODE
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002058 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002059#else
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002060 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002061#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002062 STRIPNAME(striptype));
2063 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002064 }
2065
2066 return do_strip(self, striptype);
2067}
2068
2069
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002070PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002071"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072\n\
2073Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002074whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002075If chars is given and not None, remove characters in chars instead.\n\
2076If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077
2078static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002079string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002080{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002081 if (PyTuple_GET_SIZE(args) == 0)
2082 return do_strip(self, BOTHSTRIP); /* Common case */
2083 else
2084 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002085}
2086
2087
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002088PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002089"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002090\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002091Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002092If chars is given and not None, remove characters in chars instead.\n\
2093If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002094
2095static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002098 if (PyTuple_GET_SIZE(args) == 0)
2099 return do_strip(self, LEFTSTRIP); /* Common case */
2100 else
2101 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102}
2103
2104
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002105PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002106"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002107\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002108Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002109If chars is given and not None, remove characters in chars instead.\n\
2110If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002111
2112static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002113string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002115 if (PyTuple_GET_SIZE(args) == 0)
2116 return do_strip(self, RIGHTSTRIP); /* Common case */
2117 else
2118 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002119}
2120
2121
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002122PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123"S.lower() -> string\n\
2124\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002125Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126
Thomas Wouters477c8d52006-05-27 19:21:47 +00002127/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2128#ifndef _tolower
2129#define _tolower tolower
2130#endif
2131
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002132static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002133string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002135 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002136 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002137 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002139 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002140 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002142
2143 s = PyString_AS_STRING(newobj);
2144
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002145 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002146
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002148 int c = Py_CHARMASK(s[i]);
2149 if (isupper(c))
2150 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002152
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002153 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154}
2155
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002156PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002157"S.upper() -> string\n\
2158\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002159Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160
Thomas Wouters477c8d52006-05-27 19:21:47 +00002161#ifndef _toupper
2162#define _toupper toupper
2163#endif
2164
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002166string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002168 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002169 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002170 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002171
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002172 newobj = PyString_FromStringAndSize(NULL, n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002173 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174 return NULL;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002175
2176 s = PyString_AS_STRING(newobj);
2177
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002178 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002179
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002180 for (i = 0; i < n; i++) {
Thomas Wouters477c8d52006-05-27 19:21:47 +00002181 int c = Py_CHARMASK(s[i]);
2182 if (islower(c))
2183 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002184 }
Thomas Wouters477c8d52006-05-27 19:21:47 +00002185
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002186 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002187}
2188
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002189PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190"S.title() -> string\n\
2191\n\
2192Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002193characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194
2195static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002196string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002197{
2198 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002199 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200 int previous_is_cased = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002201 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002203 newobj = PyString_FromStringAndSize(NULL, n);
2204 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002205 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002206 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002207 for (i = 0; i < n; i++) {
2208 int c = Py_CHARMASK(*s++);
2209 if (islower(c)) {
2210 if (!previous_is_cased)
2211 c = toupper(c);
2212 previous_is_cased = 1;
2213 } else if (isupper(c)) {
2214 if (previous_is_cased)
2215 c = tolower(c);
2216 previous_is_cased = 1;
2217 } else
2218 previous_is_cased = 0;
2219 *s_new++ = c;
2220 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002221 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222}
2223
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002224PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225"S.capitalize() -> string\n\
2226\n\
2227Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002228capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229
2230static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002231string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002232{
2233 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002234 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002235 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002237 newobj = PyString_FromStringAndSize(NULL, n);
2238 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002239 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002240 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002241 if (0 < n) {
2242 int c = Py_CHARMASK(*s++);
2243 if (islower(c))
2244 *s_new = toupper(c);
2245 else
2246 *s_new = c;
2247 s_new++;
2248 }
2249 for (i = 1; i < n; i++) {
2250 int c = Py_CHARMASK(*s++);
2251 if (isupper(c))
2252 *s_new = tolower(c);
2253 else
2254 *s_new = c;
2255 s_new++;
2256 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002257 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258}
2259
2260
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002261PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002262"S.count(sub[, start[, end]]) -> int\n\
2263\n\
Thomas Wouters477c8d52006-05-27 19:21:47 +00002264Return the number of non-overlapping occurrences of substring sub in\n\
2265string S[start:end]. Optional arguments start and end are interpreted\n\
2266as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002267
2268static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002269string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002270{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002271 PyObject *sub_obj;
2272 const char *str = PyString_AS_STRING(self), *sub;
2273 Py_ssize_t sub_len;
2274 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002275
Thomas Wouters477c8d52006-05-27 19:21:47 +00002276 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2277 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002278 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002279
Thomas Wouters477c8d52006-05-27 19:21:47 +00002280 if (PyString_Check(sub_obj)) {
2281 sub = PyString_AS_STRING(sub_obj);
2282 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002283 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002284#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00002285 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002286 Py_ssize_t count;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002287 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002288 if (count == -1)
2289 return NULL;
2290 else
Thomas Wouters477c8d52006-05-27 19:21:47 +00002291 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002292 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002293#endif
Thomas Wouters477c8d52006-05-27 19:21:47 +00002294 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295 return NULL;
2296
Thomas Wouters477c8d52006-05-27 19:21:47 +00002297 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002298
Thomas Wouters477c8d52006-05-27 19:21:47 +00002299 return PyInt_FromSsize_t(
2300 stringlib_count(str + start, end - start, sub, sub_len)
2301 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002302}
2303
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002304PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002305"S.swapcase() -> string\n\
2306\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002308converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002309
2310static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002311string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002312{
2313 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002314 Py_ssize_t i, n = PyString_GET_SIZE(self);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002315 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002316
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002317 newobj = PyString_FromStringAndSize(NULL, n);
2318 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319 return NULL;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002320 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002321 for (i = 0; i < n; i++) {
2322 int c = Py_CHARMASK(*s++);
2323 if (islower(c)) {
2324 *s_new = toupper(c);
2325 }
2326 else if (isupper(c)) {
2327 *s_new = tolower(c);
2328 }
2329 else
2330 *s_new = c;
2331 s_new++;
2332 }
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002333 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002334}
2335
2336
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002337PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002338"S.translate(table [,deletechars]) -> string\n\
2339\n\
2340Return a copy of the string S, where all characters occurring\n\
2341in the optional argument deletechars are removed, and the\n\
2342remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002343translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002344
2345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002346string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002347{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002348 register char *input, *output;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002349 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002350 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002351 PyObject *input_obj = (PyObject*)self;
Guido van Rossumd8faa362007-04-27 19:54:29 +00002352 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002353 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002354 PyObject *result;
2355 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002357
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002358 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002361
2362 if (PyString_Check(tableobj)) {
Guido van Rossumd8faa362007-04-27 19:54:29 +00002363 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002364 tablen = PyString_GET_SIZE(tableobj);
2365 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002366 else if (tableobj == Py_None) {
2367 table = NULL;
2368 tablen = 256;
2369 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002370#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002371 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002372 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002373 parameter; instead a mapping to None will cause characters
2374 to be deleted. */
2375 if (delobj != NULL) {
2376 PyErr_SetString(PyExc_TypeError,
2377 "deletions are implemented differently for unicode");
2378 return NULL;
2379 }
2380 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2381 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002382#endif
Guido van Rossumd8faa362007-04-27 19:54:29 +00002383 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002384 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002385
Martin v. Löwis00b61272002-12-12 20:03:19 +00002386 if (tablen != 256) {
2387 PyErr_SetString(PyExc_ValueError,
2388 "translation table must be 256 characters long");
2389 return NULL;
2390 }
2391
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392 if (delobj != NULL) {
2393 if (PyString_Check(delobj)) {
2394 del_table = PyString_AS_STRING(delobj);
2395 dellen = PyString_GET_SIZE(delobj);
2396 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002397#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398 else if (PyUnicode_Check(delobj)) {
2399 PyErr_SetString(PyExc_TypeError,
2400 "deletions are implemented differently for unicode");
2401 return NULL;
2402 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002403#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002404 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2405 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002406 }
2407 else {
2408 del_table = NULL;
2409 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410 }
2411
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002412 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413 result = PyString_FromStringAndSize((char *)NULL, inlen);
2414 if (result == NULL)
2415 return NULL;
2416 output_start = output = PyString_AsString(result);
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00002417 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002418
Guido van Rossumd8faa362007-04-27 19:54:29 +00002419 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002420 /* If no deletions are required, use faster code */
2421 for (i = inlen; --i >= 0; ) {
2422 c = Py_CHARMASK(*input++);
2423 if (Py_CHARMASK((*output++ = table[c])) != c)
2424 changed = 1;
2425 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002426 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002427 return result;
2428 Py_DECREF(result);
2429 Py_INCREF(input_obj);
2430 return input_obj;
2431 }
2432
Guido van Rossumd8faa362007-04-27 19:54:29 +00002433 if (table == NULL) {
2434 for (i = 0; i < 256; i++)
2435 trans_table[i] = Py_CHARMASK(i);
2436 } else {
2437 for (i = 0; i < 256; i++)
2438 trans_table[i] = Py_CHARMASK(table[i]);
2439 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002440
2441 for (i = 0; i < dellen; i++)
2442 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2443
2444 for (i = inlen; --i >= 0; ) {
2445 c = Py_CHARMASK(*input++);
2446 if (trans_table[c] != -1)
2447 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2448 continue;
2449 changed = 1;
2450 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002451 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002452 Py_DECREF(result);
2453 Py_INCREF(input_obj);
2454 return input_obj;
2455 }
2456 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002457 if (inlen > 0)
2458 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002459 return result;
2460}
2461
2462
Thomas Wouters477c8d52006-05-27 19:21:47 +00002463#define FORWARD 1
2464#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002465
Thomas Wouters477c8d52006-05-27 19:21:47 +00002466/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002467
Thomas Wouters477c8d52006-05-27 19:21:47 +00002468#define findchar(target, target_len, c) \
2469 ((char *)memchr((const void *)(target), c, target_len))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002470
Thomas Wouters477c8d52006-05-27 19:21:47 +00002471/* String ops must return a string. */
2472/* If the object is subclass of string, create a copy */
2473Py_LOCAL(PyStringObject *)
2474return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002475{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002476 if (PyString_CheckExact(self)) {
2477 Py_INCREF(self);
2478 return self;
2479 }
2480 return (PyStringObject *)PyString_FromStringAndSize(
2481 PyString_AS_STRING(self),
2482 PyString_GET_SIZE(self));
2483}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002484
Thomas Wouters477c8d52006-05-27 19:21:47 +00002485Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002486countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Thomas Wouters477c8d52006-05-27 19:21:47 +00002487{
2488 Py_ssize_t count=0;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002489 const char *start=target;
2490 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002491
Thomas Wouters477c8d52006-05-27 19:21:47 +00002492 while ( (start=findchar(start, end-start, c)) != NULL ) {
2493 count++;
2494 if (count >= maxcount)
2495 break;
2496 start += 1;
2497 }
2498 return count;
2499}
2500
2501Py_LOCAL(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002502findstring(const char *target, Py_ssize_t target_len,
2503 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002504 Py_ssize_t start,
2505 Py_ssize_t end,
2506 int direction)
2507{
2508 if (start < 0) {
2509 start += target_len;
2510 if (start < 0)
2511 start = 0;
2512 }
2513 if (end > target_len) {
2514 end = target_len;
2515 } else if (end < 0) {
2516 end += target_len;
2517 if (end < 0)
2518 end = 0;
2519 }
2520
2521 /* zero-length substrings always match at the first attempt */
2522 if (pattern_len == 0)
2523 return (direction > 0) ? start : end;
2524
2525 end -= pattern_len;
2526
2527 if (direction < 0) {
2528 for (; end >= start; end--)
2529 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2530 return end;
2531 } else {
2532 for (; start <= end; start++)
2533 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2534 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002535 }
2536 return -1;
2537}
2538
Thomas Wouters477c8d52006-05-27 19:21:47 +00002539Py_LOCAL_INLINE(Py_ssize_t)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002540countstring(const char *target, Py_ssize_t target_len,
2541 const char *pattern, Py_ssize_t pattern_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002542 Py_ssize_t start,
2543 Py_ssize_t end,
2544 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002545{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002546 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002547
Thomas Wouters477c8d52006-05-27 19:21:47 +00002548 if (start < 0) {
2549 start += target_len;
2550 if (start < 0)
2551 start = 0;
2552 }
2553 if (end > target_len) {
2554 end = target_len;
2555 } else if (end < 0) {
2556 end += target_len;
2557 if (end < 0)
2558 end = 0;
2559 }
2560
2561 /* zero-length substrings match everywhere */
2562 if (pattern_len == 0 || maxcount == 0) {
2563 if (target_len+1 < maxcount)
2564 return target_len+1;
2565 return maxcount;
2566 }
2567
2568 end -= pattern_len;
2569 if (direction < 0) {
2570 for (; (end >= start); end--)
2571 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2572 count++;
2573 if (--maxcount <= 0) break;
2574 end -= pattern_len-1;
2575 }
2576 } else {
2577 for (; (start <= end); start++)
2578 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2579 count++;
2580 if (--maxcount <= 0)
2581 break;
2582 start += pattern_len-1;
2583 }
2584 }
2585 return count;
2586}
2587
2588
2589/* Algorithms for different cases of string replacement */
2590
2591/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2592Py_LOCAL(PyStringObject *)
2593replace_interleave(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002594 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002595 Py_ssize_t maxcount)
2596{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002597 char *self_s, *result_s;
2598 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002599 Py_ssize_t count, i, product;
2600 PyStringObject *result;
2601
2602 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002603
Thomas Wouters477c8d52006-05-27 19:21:47 +00002604 /* 1 at the end plus 1 after every character */
2605 count = self_len+1;
2606 if (maxcount < count)
2607 count = maxcount;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002608
Thomas Wouters477c8d52006-05-27 19:21:47 +00002609 /* Check for overflow */
2610 /* result_len = count * to_len + self_len; */
2611 product = count * to_len;
2612 if (product / to_len != count) {
2613 PyErr_SetString(PyExc_OverflowError,
2614 "replace string is too long");
2615 return NULL;
2616 }
2617 result_len = product + self_len;
2618 if (result_len < 0) {
2619 PyErr_SetString(PyExc_OverflowError,
2620 "replace string is too long");
2621 return NULL;
2622 }
2623
2624 if (! (result = (PyStringObject *)
2625 PyString_FromStringAndSize(NULL, result_len)) )
2626 return NULL;
2627
2628 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002629 result_s = PyString_AS_STRING(result);
2630
2631 /* TODO: special case single character, which doesn't need memcpy */
2632
2633 /* Lay the first one down (guaranteed this will occur) */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002634 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002635 result_s += to_len;
2636 count -= 1;
2637
2638 for (i=0; i<count; i++) {
2639 *result_s++ = *self_s++;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002640 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002641 result_s += to_len;
2642 }
2643
2644 /* Copy the rest of the original string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002645 Py_MEMCPY(result_s, self_s, self_len-i);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002646
2647 return result;
2648}
2649
2650/* Special case for deleting a single character */
2651/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2652Py_LOCAL(PyStringObject *)
2653replace_delete_single_character(PyStringObject *self,
2654 char from_c, Py_ssize_t maxcount)
2655{
2656 char *self_s, *result_s;
2657 char *start, *next, *end;
2658 Py_ssize_t self_len, result_len;
2659 Py_ssize_t count;
2660 PyStringObject *result;
2661
2662 self_len = PyString_GET_SIZE(self);
2663 self_s = PyString_AS_STRING(self);
2664
2665 count = countchar(self_s, self_len, from_c, maxcount);
2666 if (count == 0) {
2667 return return_self(self);
2668 }
2669
2670 result_len = self_len - count; /* from_len == 1 */
2671 assert(result_len>=0);
2672
2673 if ( (result = (PyStringObject *)
2674 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2675 return NULL;
2676 result_s = PyString_AS_STRING(result);
2677
2678 start = self_s;
2679 end = self_s + self_len;
2680 while (count-- > 0) {
2681 next = findchar(start, end-start, from_c);
2682 if (next == NULL)
2683 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002684 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002685 result_s += (next-start);
2686 start = next+1;
2687 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002688 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002689
Thomas Wouters477c8d52006-05-27 19:21:47 +00002690 return result;
2691}
2692
2693/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2694
2695Py_LOCAL(PyStringObject *)
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002696replace_delete_substring(PyStringObject *self,
2697 const char *from_s, Py_ssize_t from_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002698 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002699 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002700 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002701 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002702 Py_ssize_t count, offset;
2703 PyStringObject *result;
2704
2705 self_len = PyString_GET_SIZE(self);
2706 self_s = PyString_AS_STRING(self);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002707
2708 count = countstring(self_s, self_len,
2709 from_s, from_len,
2710 0, self_len, 1,
2711 maxcount);
2712
2713 if (count == 0) {
2714 /* no matches */
2715 return return_self(self);
2716 }
2717
2718 result_len = self_len - (count * from_len);
2719 assert (result_len>=0);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002720
Thomas Wouters477c8d52006-05-27 19:21:47 +00002721 if ( (result = (PyStringObject *)
2722 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2723 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002724
Thomas Wouters477c8d52006-05-27 19:21:47 +00002725 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002726
Thomas Wouters477c8d52006-05-27 19:21:47 +00002727 start = self_s;
2728 end = self_s + self_len;
2729 while (count-- > 0) {
2730 offset = findstring(start, end-start,
2731 from_s, from_len,
2732 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002733 if (offset == -1)
2734 break;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002735 next = start + offset;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002736
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002737 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002738
Thomas Wouters477c8d52006-05-27 19:21:47 +00002739 result_s += (next-start);
2740 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002741 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002742 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002743 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002744}
2745
Thomas Wouters477c8d52006-05-27 19:21:47 +00002746/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2747Py_LOCAL(PyStringObject *)
2748replace_single_character_in_place(PyStringObject *self,
2749 char from_c, char to_c,
2750 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002751{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002752 char *self_s, *result_s, *start, *end, *next;
2753 Py_ssize_t self_len;
2754 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002755
Thomas Wouters477c8d52006-05-27 19:21:47 +00002756 /* The result string will be the same size */
2757 self_s = PyString_AS_STRING(self);
2758 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002759
Thomas Wouters477c8d52006-05-27 19:21:47 +00002760 next = findchar(self_s, self_len, from_c);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002761
Thomas Wouters477c8d52006-05-27 19:21:47 +00002762 if (next == NULL) {
2763 /* No matches; return the original string */
2764 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002765 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002766
Thomas Wouters477c8d52006-05-27 19:21:47 +00002767 /* Need to make a new string */
2768 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2769 if (result == NULL)
2770 return NULL;
2771 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002772 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002773
Thomas Wouters477c8d52006-05-27 19:21:47 +00002774 /* change everything in-place, starting with this one */
2775 start = result_s + (next-self_s);
2776 *start = to_c;
2777 start++;
2778 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002779
Thomas Wouters477c8d52006-05-27 19:21:47 +00002780 while (--maxcount > 0) {
2781 next = findchar(start, end-start, from_c);
2782 if (next == NULL)
2783 break;
2784 *next = to_c;
2785 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002786 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002787
Thomas Wouters477c8d52006-05-27 19:21:47 +00002788 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002789}
2790
Thomas Wouters477c8d52006-05-27 19:21:47 +00002791/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2792Py_LOCAL(PyStringObject *)
2793replace_substring_in_place(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002794 const char *from_s, Py_ssize_t from_len,
2795 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002796 Py_ssize_t maxcount)
2797{
2798 char *result_s, *start, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002799 char *self_s;
2800 Py_ssize_t self_len, offset;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002801 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002802
Thomas Wouters477c8d52006-05-27 19:21:47 +00002803 /* The result string will be the same size */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002804
Thomas Wouters477c8d52006-05-27 19:21:47 +00002805 self_s = PyString_AS_STRING(self);
2806 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002807
Thomas Wouters477c8d52006-05-27 19:21:47 +00002808 offset = findstring(self_s, self_len,
2809 from_s, from_len,
2810 0, self_len, FORWARD);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002811 if (offset == -1) {
2812 /* No matches; return the original string */
2813 return return_self(self);
2814 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002815
Thomas Wouters477c8d52006-05-27 19:21:47 +00002816 /* Need to make a new string */
2817 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2818 if (result == NULL)
2819 return NULL;
2820 result_s = PyString_AS_STRING(result);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002821 Py_MEMCPY(result_s, self_s, self_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002822
Thomas Wouters477c8d52006-05-27 19:21:47 +00002823 /* change everything in-place, starting with this one */
2824 start = result_s + offset;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002825 Py_MEMCPY(start, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002826 start += from_len;
2827 end = result_s + self_len;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002828
Thomas Wouters477c8d52006-05-27 19:21:47 +00002829 while ( --maxcount > 0) {
2830 offset = findstring(start, end-start,
2831 from_s, from_len,
2832 0, end-start, FORWARD);
2833 if (offset==-1)
2834 break;
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002835 Py_MEMCPY(start+offset, to_s, from_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002836 start += offset+from_len;
2837 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002838
Thomas Wouters477c8d52006-05-27 19:21:47 +00002839 return result;
2840}
2841
2842/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2843Py_LOCAL(PyStringObject *)
2844replace_single_character(PyStringObject *self,
2845 char from_c,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002846 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002847 Py_ssize_t maxcount)
2848{
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002849 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002850 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002851 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002852 Py_ssize_t count, product;
2853 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002854
Thomas Wouters477c8d52006-05-27 19:21:47 +00002855 self_s = PyString_AS_STRING(self);
2856 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002857
Thomas Wouters477c8d52006-05-27 19:21:47 +00002858 count = countchar(self_s, self_len, from_c, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002859 if (count == 0) {
2860 /* no matches, return unchanged */
2861 return return_self(self);
2862 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002863
Thomas Wouters477c8d52006-05-27 19:21:47 +00002864 /* use the difference between current and new, hence the "-1" */
2865 /* result_len = self_len + count * (to_len-1) */
2866 product = count * (to_len-1);
2867 if (product / (to_len-1) != count) {
2868 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2869 return NULL;
2870 }
2871 result_len = self_len + product;
2872 if (result_len < 0) {
2873 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2874 return NULL;
2875 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002876
Thomas Wouters477c8d52006-05-27 19:21:47 +00002877 if ( (result = (PyStringObject *)
2878 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2879 return NULL;
2880 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002881
Thomas Wouters477c8d52006-05-27 19:21:47 +00002882 start = self_s;
2883 end = self_s + self_len;
2884 while (count-- > 0) {
2885 next = findchar(start, end-start, from_c);
2886 if (next == NULL)
2887 break;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002888
Thomas Wouters477c8d52006-05-27 19:21:47 +00002889 if (next == start) {
2890 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002891 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002892 result_s += to_len;
2893 start += 1;
2894 } else {
2895 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002896 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002897 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002898 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002899 result_s += to_len;
2900 start = next+1;
2901 }
2902 }
2903 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002904 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002905
Thomas Wouters477c8d52006-05-27 19:21:47 +00002906 return result;
2907}
2908
2909/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2910Py_LOCAL(PyStringObject *)
2911replace_substring(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002912 const char *from_s, Py_ssize_t from_len,
2913 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002914 Py_ssize_t maxcount) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002915 char *self_s, *result_s;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002916 char *start, *next, *end;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002917 Py_ssize_t self_len, result_len;
Thomas Wouters477c8d52006-05-27 19:21:47 +00002918 Py_ssize_t count, offset, product;
2919 PyStringObject *result;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002920
Thomas Wouters477c8d52006-05-27 19:21:47 +00002921 self_s = PyString_AS_STRING(self);
2922 self_len = PyString_GET_SIZE(self);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002923
Thomas Wouters477c8d52006-05-27 19:21:47 +00002924 count = countstring(self_s, self_len,
2925 from_s, from_len,
2926 0, self_len, FORWARD, maxcount);
2927 if (count == 0) {
2928 /* no matches, return unchanged */
2929 return return_self(self);
2930 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002931
Thomas Wouters477c8d52006-05-27 19:21:47 +00002932 /* Check for overflow */
2933 /* result_len = self_len + count * (to_len-from_len) */
2934 product = count * (to_len-from_len);
2935 if (product / (to_len-from_len) != count) {
2936 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2937 return NULL;
2938 }
2939 result_len = self_len + product;
2940 if (result_len < 0) {
2941 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2942 return NULL;
2943 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002944
Thomas Wouters477c8d52006-05-27 19:21:47 +00002945 if ( (result = (PyStringObject *)
2946 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2947 return NULL;
2948 result_s = PyString_AS_STRING(result);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002949
Thomas Wouters477c8d52006-05-27 19:21:47 +00002950 start = self_s;
2951 end = self_s + self_len;
2952 while (count-- > 0) {
2953 offset = findstring(start, end-start,
2954 from_s, from_len,
2955 0, end-start, FORWARD);
2956 if (offset == -1)
2957 break;
2958 next = start+offset;
2959 if (next == start) {
2960 /* replace with the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002961 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002962 result_s += to_len;
2963 start += from_len;
2964 } else {
2965 /* copy the unchanged old then the 'to' */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002966 Py_MEMCPY(result_s, start, next-start);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002967 result_s += (next-start);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002968 Py_MEMCPY(result_s, to_s, to_len);
Thomas Wouters477c8d52006-05-27 19:21:47 +00002969 result_s += to_len;
2970 start = next+from_len;
2971 }
2972 }
2973 /* Copy the remainder of the remaining string */
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00002974 Py_MEMCPY(result_s, start, end-start);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002975
Thomas Wouters477c8d52006-05-27 19:21:47 +00002976 return result;
2977}
2978
2979
2980Py_LOCAL(PyStringObject *)
2981replace(PyStringObject *self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002982 const char *from_s, Py_ssize_t from_len,
2983 const char *to_s, Py_ssize_t to_len,
Thomas Wouters477c8d52006-05-27 19:21:47 +00002984 Py_ssize_t maxcount)
2985{
Thomas Wouters477c8d52006-05-27 19:21:47 +00002986 if (maxcount < 0) {
2987 maxcount = PY_SSIZE_T_MAX;
2988 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2989 /* nothing to do; return the original string */
2990 return return_self(self);
2991 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00002992
Thomas Wouters477c8d52006-05-27 19:21:47 +00002993 if (maxcount == 0 ||
2994 (from_len == 0 && to_len == 0)) {
2995 /* nothing to do; return the original string */
2996 return return_self(self);
2997 }
2998
2999 /* Handle zero-length special cases */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003000
Thomas Wouters477c8d52006-05-27 19:21:47 +00003001 if (from_len == 0) {
3002 /* insert the 'to' string everywhere. */
3003 /* >>> "Python".replace("", ".") */
3004 /* '.P.y.t.h.o.n.' */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003005 return replace_interleave(self, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003006 }
3007
3008 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3009 /* point for an empty self string to generate a non-empty string */
3010 /* Special case so the remaining code always gets a non-empty string */
3011 if (PyString_GET_SIZE(self) == 0) {
3012 return return_self(self);
3013 }
3014
3015 if (to_len == 0) {
3016 /* delete all occurances of 'from' string */
3017 if (from_len == 1) {
3018 return replace_delete_single_character(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003019 self, from_s[0], maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003020 } else {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003021 return replace_delete_substring(self, from_s, from_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003022 }
3023 }
3024
3025 /* Handle special case where both strings have the same length */
3026
3027 if (from_len == to_len) {
3028 if (from_len == 1) {
3029 return replace_single_character_in_place(
3030 self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003031 from_s[0],
3032 to_s[0],
Thomas Wouters477c8d52006-05-27 19:21:47 +00003033 maxcount);
3034 } else {
3035 return replace_substring_in_place(
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003036 self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003037 }
3038 }
3039
3040 /* Otherwise use the more generic algorithms */
3041 if (from_len == 1) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003042 return replace_single_character(self, from_s[0],
3043 to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003044 } else {
3045 /* len('from')>=2, len('to')>=1 */
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003046 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Thomas Wouters477c8d52006-05-27 19:21:47 +00003047 }
3048}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003049
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003050PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003051"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003052\n\
3053Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003054old replaced by new. If the optional argument count is\n\
3055given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003056
3057static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003058string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003059{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003060 Py_ssize_t count = -1;
Thomas Wouters477c8d52006-05-27 19:21:47 +00003061 PyObject *from, *to;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003062 const char *from_s, *to_s;
3063 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003064
Thomas Wouters477c8d52006-05-27 19:21:47 +00003065 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003066 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003067
Thomas Wouters477c8d52006-05-27 19:21:47 +00003068 if (PyString_Check(from)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003069 from_s = PyString_AS_STRING(from);
3070 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003071 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003072#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00003073 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003074 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003075 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003076#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003077 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003078 return NULL;
3079
Thomas Wouters477c8d52006-05-27 19:21:47 +00003080 if (PyString_Check(to)) {
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003081 to_s = PyString_AS_STRING(to);
3082 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003083 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003084#ifdef Py_USING_UNICODE
Thomas Wouters477c8d52006-05-27 19:21:47 +00003085 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003086 return PyUnicode_Replace((PyObject *)self,
Thomas Wouters477c8d52006-05-27 19:21:47 +00003087 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003088#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003089 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003090 return NULL;
3091
Thomas Wouters477c8d52006-05-27 19:21:47 +00003092 return (PyObject *)replace((PyStringObject *) self,
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003093 from_s, from_len,
3094 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003095}
3096
Thomas Wouters477c8d52006-05-27 19:21:47 +00003097/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003098
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003099/* Matches the end (direction >= 0) or start (direction < 0) of self
3100 * against substr, using the start and end arguments. Returns
3101 * -1 on error, 0 if not found and 1 if found.
3102 */
3103Py_LOCAL(int)
3104_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3105 Py_ssize_t end, int direction)
3106{
3107 Py_ssize_t len = PyString_GET_SIZE(self);
3108 Py_ssize_t slen;
3109 const char* sub;
3110 const char* str;
3111
3112 if (PyString_Check(substr)) {
3113 sub = PyString_AS_STRING(substr);
3114 slen = PyString_GET_SIZE(substr);
3115 }
3116#ifdef Py_USING_UNICODE
3117 else if (PyUnicode_Check(substr))
3118 return PyUnicode_Tailmatch((PyObject *)self,
3119 substr, start, end, direction);
3120#endif
3121 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3122 return -1;
3123 str = PyString_AS_STRING(self);
3124
3125 string_adjust_indices(&start, &end, len);
3126
3127 if (direction < 0) {
3128 /* startswith */
3129 if (start+slen > len)
3130 return 0;
3131 } else {
3132 /* endswith */
3133 if (end-start < slen || start > len)
3134 return 0;
3135
3136 if (end-slen > start)
3137 start = end - slen;
3138 }
3139 if (end-start >= slen)
3140 return ! memcmp(str+start, sub, slen);
3141 return 0;
3142}
3143
3144
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003145PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003146"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003147\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003148Return True if S starts with the specified prefix, False otherwise.\n\
3149With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003150With optional end, stop comparing S at that position.\n\
3151prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003152
3153static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003154string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003155{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003156 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003157 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003158 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003159 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003160
Guido van Rossumc6821402000-05-08 14:08:05 +00003161 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3162 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003163 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003164 if (PyTuple_Check(subobj)) {
3165 Py_ssize_t i;
3166 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3167 result = _string_tailmatch(self,
3168 PyTuple_GET_ITEM(subobj, i),
3169 start, end, -1);
3170 if (result == -1)
3171 return NULL;
3172 else if (result) {
3173 Py_RETURN_TRUE;
3174 }
3175 }
3176 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003177 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003178 result = _string_tailmatch(self, subobj, start, end, -1);
3179 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003180 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003181 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003182 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003183}
3184
3185
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003186PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003187"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003188\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003189Return True if S ends with the specified suffix, False otherwise.\n\
3190With optional start, test S beginning at that position.\n\
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003191With optional end, stop comparing S at that position.\n\
3192suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003193
3194static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003195string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003196{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003197 Py_ssize_t start = 0;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003198 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003199 PyObject *subobj;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003200 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003201
Guido van Rossumc6821402000-05-08 14:08:05 +00003202 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3203 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003204 return NULL;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003205 if (PyTuple_Check(subobj)) {
3206 Py_ssize_t i;
3207 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3208 result = _string_tailmatch(self,
3209 PyTuple_GET_ITEM(subobj, i),
3210 start, end, +1);
3211 if (result == -1)
3212 return NULL;
3213 else if (result) {
3214 Py_RETURN_TRUE;
3215 }
3216 }
3217 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003218 }
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003219 result = _string_tailmatch(self, subobj, start, end, +1);
3220 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003221 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003222 else
Thomas Wouters0e3f5912006-08-11 14:57:12 +00003223 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003224}
3225
3226
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003227PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003228"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003229\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003230Encodes S using the codec registered for encoding. encoding defaults\n\
3231to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003232handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003233a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3234'xmlcharrefreplace' as well as any other name registered with\n\
3235codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003236
3237static PyObject *
3238string_encode(PyStringObject *self, PyObject *args)
3239{
3240 char *encoding = NULL;
3241 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003242 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003243
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003244 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3245 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003246 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003247 if (v == NULL)
3248 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003249 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3250 PyErr_Format(PyExc_TypeError,
3251 "encoder did not return a string/unicode object "
3252 "(type=%.400s)",
3253 v->ob_type->tp_name);
3254 Py_DECREF(v);
3255 return NULL;
3256 }
3257 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003258
3259 onError:
3260 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003261}
3262
3263
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003264PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003265"S.decode([encoding[,errors]]) -> object\n\
3266\n\
3267Decodes S using the codec registered for encoding. encoding defaults\n\
3268to the default encoding. errors may be given to set a different error\n\
3269handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003270a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3271as well as any other name registerd with codecs.register_error that is\n\
3272able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003273
3274static PyObject *
3275string_decode(PyStringObject *self, PyObject *args)
3276{
3277 char *encoding = NULL;
3278 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003279 PyObject *v;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003280
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003281 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3282 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003283 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003284 if (v == NULL)
3285 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003286 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3287 PyErr_Format(PyExc_TypeError,
3288 "decoder did not return a string/unicode object "
3289 "(type=%.400s)",
3290 v->ob_type->tp_name);
3291 Py_DECREF(v);
3292 return NULL;
3293 }
3294 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003295
3296 onError:
3297 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003298}
3299
3300
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003301PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003302"S.expandtabs([tabsize]) -> string\n\
3303\n\
3304Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003305If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003306
3307static PyObject*
3308string_expandtabs(PyStringObject *self, PyObject *args)
3309{
3310 const char *e, *p;
3311 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003312 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003313 PyObject *u;
3314 int tabsize = 8;
3315
3316 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3317 return NULL;
3318
Thomas Wouters7e474022000-07-16 12:04:32 +00003319 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00003320 i = j = 0;
3321 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3322 for (p = PyString_AS_STRING(self); p < e; p++)
3323 if (*p == '\t') {
3324 if (tabsize > 0)
3325 j += tabsize - (j % tabsize);
3326 }
3327 else {
3328 j++;
3329 if (*p == '\n' || *p == '\r') {
3330 i += j;
3331 j = 0;
3332 }
3333 }
3334
3335 /* Second pass: create output string and fill it */
3336 u = PyString_FromStringAndSize(NULL, i + j);
3337 if (!u)
3338 return NULL;
3339
3340 j = 0;
3341 q = PyString_AS_STRING(u);
3342
3343 for (p = PyString_AS_STRING(self); p < e; p++)
3344 if (*p == '\t') {
3345 if (tabsize > 0) {
3346 i = tabsize - (j % tabsize);
3347 j += i;
3348 while (i--)
3349 *q++ = ' ';
3350 }
3351 }
3352 else {
3353 j++;
3354 *q++ = *p;
3355 if (*p == '\n' || *p == '\r')
3356 j = 0;
3357 }
3358
3359 return u;
3360}
3361
Thomas Wouters477c8d52006-05-27 19:21:47 +00003362Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003363pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003364{
3365 PyObject *u;
3366
3367 if (left < 0)
3368 left = 0;
3369 if (right < 0)
3370 right = 0;
3371
Tim Peters8fa5dd02001-09-12 02:18:30 +00003372 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003373 Py_INCREF(self);
3374 return (PyObject *)self;
3375 }
3376
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003377 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003378 left + PyString_GET_SIZE(self) + right);
3379 if (u) {
3380 if (left)
3381 memset(PyString_AS_STRING(u), fill, left);
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003382 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003383 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003384 PyString_GET_SIZE(self));
3385 if (right)
3386 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3387 fill, right);
3388 }
3389
3390 return u;
3391}
3392
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003393PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003394"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003395"\n"
3396"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003397"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003398
3399static PyObject *
3400string_ljust(PyStringObject *self, PyObject *args)
3401{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003402 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003403 char fillchar = ' ';
3404
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003405 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406 return NULL;
3407
Tim Peters8fa5dd02001-09-12 02:18:30 +00003408 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003409 Py_INCREF(self);
3410 return (PyObject*) self;
3411 }
3412
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003413 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414}
3415
3416
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003417PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003418"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003419"\n"
3420"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003421"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003422
3423static PyObject *
3424string_rjust(PyStringObject *self, PyObject *args)
3425{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003426 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003427 char fillchar = ' ';
3428
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003429 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003430 return NULL;
3431
Tim Peters8fa5dd02001-09-12 02:18:30 +00003432 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003433 Py_INCREF(self);
3434 return (PyObject*) self;
3435 }
3436
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003437 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003438}
3439
3440
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003441PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003442"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003443"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003444"Return S centered in a string of length width. Padding is\n"
3445"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003446
3447static PyObject *
3448string_center(PyStringObject *self, PyObject *args)
3449{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003450 Py_ssize_t marg, left;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003451 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003452 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003453
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003454 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003455 return NULL;
3456
Tim Peters8fa5dd02001-09-12 02:18:30 +00003457 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003458 Py_INCREF(self);
3459 return (PyObject*) self;
3460 }
3461
3462 marg = width - PyString_GET_SIZE(self);
3463 left = marg / 2 + (marg & width & 1);
3464
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003465 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003466}
3467
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003468PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003469"S.zfill(width) -> string\n"
3470"\n"
3471"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003472"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003473
3474static PyObject *
3475string_zfill(PyStringObject *self, PyObject *args)
3476{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003477 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003478 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003479 char *p;
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003480 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003481
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00003482 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003483 return NULL;
3484
3485 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003486 if (PyString_CheckExact(self)) {
3487 Py_INCREF(self);
3488 return (PyObject*) self;
3489 }
3490 else
3491 return PyString_FromStringAndSize(
3492 PyString_AS_STRING(self),
3493 PyString_GET_SIZE(self)
3494 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003495 }
3496
3497 fill = width - PyString_GET_SIZE(self);
3498
3499 s = pad(self, fill, 0, '0');
3500
3501 if (s == NULL)
3502 return NULL;
3503
3504 p = PyString_AS_STRING(s);
3505 if (p[fill] == '+' || p[fill] == '-') {
3506 /* move sign to beginning of string */
3507 p[0] = p[fill];
3508 p[fill] = '0';
3509 }
3510
3511 return (PyObject*) s;
3512}
3513
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003514PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003515"S.isspace() -> bool\n\
3516\n\
3517Return True if all characters in S are whitespace\n\
3518and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003519
3520static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003521string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003522{
Fred Drakeba096332000-07-09 07:04:36 +00003523 register const unsigned char *p
3524 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003525 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003526
Guido van Rossum4c08d552000-03-10 22:55:18 +00003527 /* Shortcut for single character strings */
3528 if (PyString_GET_SIZE(self) == 1 &&
3529 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003530 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003531
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003532 /* Special case for empty strings */
3533 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003534 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003535
Guido van Rossum4c08d552000-03-10 22:55:18 +00003536 e = p + PyString_GET_SIZE(self);
3537 for (; p < e; p++) {
3538 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003539 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003540 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003541 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003542}
3543
3544
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003545PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003546"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003547\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003548Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003549and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003550
3551static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003552string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003553{
Fred Drakeba096332000-07-09 07:04:36 +00003554 register const unsigned char *p
3555 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003556 register const unsigned char *e;
3557
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003558 /* Shortcut for single character strings */
3559 if (PyString_GET_SIZE(self) == 1 &&
3560 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003561 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003562
3563 /* Special case for empty strings */
3564 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003565 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003566
3567 e = p + PyString_GET_SIZE(self);
3568 for (; p < e; p++) {
3569 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003570 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003571 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003572 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003573}
3574
3575
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003576PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003577"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003578\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003579Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003580and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003581
3582static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003583string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003584{
Fred Drakeba096332000-07-09 07:04:36 +00003585 register const unsigned char *p
3586 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003587 register const unsigned char *e;
3588
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003589 /* Shortcut for single character strings */
3590 if (PyString_GET_SIZE(self) == 1 &&
3591 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003592 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003593
3594 /* Special case for empty strings */
3595 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003596 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003597
3598 e = p + PyString_GET_SIZE(self);
3599 for (; p < e; p++) {
3600 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003601 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003602 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003603 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003604}
3605
3606
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003607PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003608"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003609\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003610Return True if all characters in S are digits\n\
3611and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003612
3613static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003614string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003615{
Fred Drakeba096332000-07-09 07:04:36 +00003616 register const unsigned char *p
3617 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003618 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003619
Guido van Rossum4c08d552000-03-10 22:55:18 +00003620 /* Shortcut for single character strings */
3621 if (PyString_GET_SIZE(self) == 1 &&
3622 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003623 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003624
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003625 /* Special case for empty strings */
3626 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003627 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003628
Guido van Rossum4c08d552000-03-10 22:55:18 +00003629 e = p + PyString_GET_SIZE(self);
3630 for (; p < e; p++) {
3631 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003632 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003633 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003634 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003635}
3636
3637
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003638PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003639"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003640\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003641Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003642at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003643
3644static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003645string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003646{
Fred Drakeba096332000-07-09 07:04:36 +00003647 register const unsigned char *p
3648 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003649 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003650 int cased;
3651
Guido van Rossum4c08d552000-03-10 22:55:18 +00003652 /* Shortcut for single character strings */
3653 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003654 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003655
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003656 /* Special case for empty strings */
3657 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003658 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003659
Guido van Rossum4c08d552000-03-10 22:55:18 +00003660 e = p + PyString_GET_SIZE(self);
3661 cased = 0;
3662 for (; p < e; p++) {
3663 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003664 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003665 else if (!cased && islower(*p))
3666 cased = 1;
3667 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003668 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003669}
3670
3671
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003672PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003673"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003674\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003675Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003676at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003677
3678static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003679string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003680{
Fred Drakeba096332000-07-09 07:04:36 +00003681 register const unsigned char *p
3682 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003683 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003684 int cased;
3685
Guido van Rossum4c08d552000-03-10 22:55:18 +00003686 /* Shortcut for single character strings */
3687 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003688 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003689
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003690 /* Special case for empty strings */
3691 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003692 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003693
Guido van Rossum4c08d552000-03-10 22:55:18 +00003694 e = p + PyString_GET_SIZE(self);
3695 cased = 0;
3696 for (; p < e; p++) {
3697 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003698 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699 else if (!cased && isupper(*p))
3700 cased = 1;
3701 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003702 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703}
3704
3705
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003706PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003707"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003709Return True if S is a titlecased string and there is at least one\n\
3710character in S, i.e. uppercase characters may only follow uncased\n\
3711characters and lowercase characters only cased ones. Return False\n\
3712otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003713
3714static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003715string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003716{
Fred Drakeba096332000-07-09 07:04:36 +00003717 register const unsigned char *p
3718 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003719 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003720 int cased, previous_is_cased;
3721
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722 /* Shortcut for single character strings */
3723 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003724 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003726 /* Special case for empty strings */
3727 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003728 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003729
Guido van Rossum4c08d552000-03-10 22:55:18 +00003730 e = p + PyString_GET_SIZE(self);
3731 cased = 0;
3732 previous_is_cased = 0;
3733 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003734 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003735
3736 if (isupper(ch)) {
3737 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003738 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739 previous_is_cased = 1;
3740 cased = 1;
3741 }
3742 else if (islower(ch)) {
3743 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003744 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003745 previous_is_cased = 1;
3746 cased = 1;
3747 }
3748 else
3749 previous_is_cased = 0;
3750 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003751 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003752}
3753
3754
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003755PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003756"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003757\n\
3758Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003759Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003760is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003761
Guido van Rossum4c08d552000-03-10 22:55:18 +00003762static PyObject*
3763string_splitlines(PyStringObject *self, PyObject *args)
3764{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003765 register Py_ssize_t i;
3766 register Py_ssize_t j;
3767 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003768 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003769 PyObject *list;
3770 PyObject *str;
3771 char *data;
3772
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003773 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003774 return NULL;
3775
3776 data = PyString_AS_STRING(self);
3777 len = PyString_GET_SIZE(self);
3778
Thomas Wouters477c8d52006-05-27 19:21:47 +00003779 /* This does not use the preallocated list because splitlines is
3780 usually run with hundreds of newlines. The overhead of
3781 switching between PyList_SET_ITEM and append causes about a
3782 2-3% slowdown for that common case. A smarter implementation
3783 could move the if check out, so the SET_ITEMs are done first
3784 and the appends only done when the prealloc buffer is full.
3785 That's too much work for little gain.*/
3786
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787 list = PyList_New(0);
3788 if (!list)
3789 goto onError;
3790
3791 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003792 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003793
Guido van Rossum4c08d552000-03-10 22:55:18 +00003794 /* Find a line and append it */
3795 while (i < len && data[i] != '\n' && data[i] != '\r')
3796 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003797
3798 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003799 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003800 if (i < len) {
3801 if (data[i] == '\r' && i + 1 < len &&
3802 data[i+1] == '\n')
3803 i += 2;
3804 else
3805 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003806 if (keepends)
3807 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003808 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003809 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003810 j = i;
3811 }
3812 if (j < len) {
3813 SPLIT_APPEND(data, j, len);
3814 }
3815
3816 return list;
3817
3818 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003819 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003820 return NULL;
3821}
3822
3823#undef SPLIT_APPEND
Thomas Wouters477c8d52006-05-27 19:21:47 +00003824#undef SPLIT_ADD
3825#undef MAX_PREALLOC
3826#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003827
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003828static PyObject *
3829string_getnewargs(PyStringObject *v)
3830{
3831 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3832}
3833
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003834
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003835static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003836string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003837 /* Counterparts of the obsolete stropmodule functions; except
3838 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003839 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3840 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003841 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003842 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3843 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003844 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3845 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3846 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3847 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3848 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3849 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3850 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003851 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3852 capitalize__doc__},
3853 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3854 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3855 endswith__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003856 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003857 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3858 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3859 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3860 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3861 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3862 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3863 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Thomas Wouters477c8d52006-05-27 19:21:47 +00003864 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3865 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003866 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3867 startswith__doc__},
3868 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3869 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3870 swapcase__doc__},
3871 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3872 translate__doc__},
3873 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3874 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3875 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3876 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3877 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3878 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3879 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3880 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3881 expandtabs__doc__},
3882 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3883 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003884 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003885 {NULL, NULL} /* sentinel */
3886};
3887
Jeremy Hylton938ace62002-07-17 16:30:39 +00003888static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003889str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3890
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003891static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003892string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003893{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003894 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003895 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003896
Guido van Rossumae960af2001-08-30 03:11:59 +00003897 if (type != &PyString_Type)
3898 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003899 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3900 return NULL;
3901 if (x == NULL)
3902 return PyString_FromString("");
3903 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003904}
3905
Guido van Rossumae960af2001-08-30 03:11:59 +00003906static PyObject *
3907str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3908{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003909 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003910 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003911
3912 assert(PyType_IsSubtype(type, &PyString_Type));
3913 tmp = string_new(&PyString_Type, args, kwds);
3914 if (tmp == NULL)
3915 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003916 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003917 n = PyString_GET_SIZE(tmp);
3918 pnew = type->tp_alloc(type, n);
3919 if (pnew != NULL) {
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00003920 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003921 ((PyStringObject *)pnew)->ob_shash =
3922 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003923 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003924 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003925 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003926 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003927}
3928
Guido van Rossumcacfc072002-05-24 19:01:59 +00003929static PyObject *
3930basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3931{
3932 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003933 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003934 return NULL;
3935}
3936
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003937static PyObject *
3938string_mod(PyObject *v, PyObject *w)
3939{
3940 if (!PyString_Check(v)) {
3941 Py_INCREF(Py_NotImplemented);
3942 return Py_NotImplemented;
3943 }
3944 return PyString_Format(v, w);
3945}
3946
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003947PyDoc_STRVAR(basestring_doc,
3948"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003949
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003950static PyNumberMethods string_as_number = {
3951 0, /*nb_add*/
3952 0, /*nb_subtract*/
3953 0, /*nb_multiply*/
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003954 string_mod, /*nb_remainder*/
3955};
3956
3957
Guido van Rossumcacfc072002-05-24 19:01:59 +00003958PyTypeObject PyBaseString_Type = {
3959 PyObject_HEAD_INIT(&PyType_Type)
3960 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003961 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003962 0,
3963 0,
3964 0, /* tp_dealloc */
3965 0, /* tp_print */
3966 0, /* tp_getattr */
3967 0, /* tp_setattr */
3968 0, /* tp_compare */
3969 0, /* tp_repr */
3970 0, /* tp_as_number */
3971 0, /* tp_as_sequence */
3972 0, /* tp_as_mapping */
3973 0, /* tp_hash */
3974 0, /* tp_call */
3975 0, /* tp_str */
3976 0, /* tp_getattro */
3977 0, /* tp_setattro */
3978 0, /* tp_as_buffer */
3979 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3980 basestring_doc, /* tp_doc */
3981 0, /* tp_traverse */
3982 0, /* tp_clear */
3983 0, /* tp_richcompare */
3984 0, /* tp_weaklistoffset */
3985 0, /* tp_iter */
3986 0, /* tp_iternext */
3987 0, /* tp_methods */
3988 0, /* tp_members */
3989 0, /* tp_getset */
3990 &PyBaseObject_Type, /* tp_base */
3991 0, /* tp_dict */
3992 0, /* tp_descr_get */
3993 0, /* tp_descr_set */
3994 0, /* tp_dictoffset */
3995 0, /* tp_init */
3996 0, /* tp_alloc */
3997 basestring_new, /* tp_new */
3998 0, /* tp_free */
3999};
4000
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004001PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004002"str(object) -> string\n\
4003\n\
4004Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004005If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004006
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004007static PyObject *str_iter(PyObject *seq);
4008
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004009PyTypeObject PyString_Type = {
4010 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004011 0,
Guido van Rossum84fc66d2007-05-03 17:18:26 +00004012 "str8",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004013 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004014 sizeof(char),
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004015 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004016 (printfunc)string_print, /* tp_print */
4017 0, /* tp_getattr */
4018 0, /* tp_setattr */
4019 0, /* tp_compare */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004020 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004021 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004022 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004023 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004024 (hashfunc)string_hash, /* tp_hash */
4025 0, /* tp_call */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004026 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004027 PyObject_GenericGetAttr, /* tp_getattro */
4028 0, /* tp_setattro */
4029 &string_as_buffer, /* tp_as_buffer */
Thomas Wouters27d517b2007-02-25 20:39:11 +00004030 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
4031 Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004032 string_doc, /* tp_doc */
4033 0, /* tp_traverse */
4034 0, /* tp_clear */
4035 (richcmpfunc)string_richcompare, /* tp_richcompare */
4036 0, /* tp_weaklistoffset */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00004037 str_iter, /* tp_iter */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004038 0, /* tp_iternext */
4039 string_methods, /* tp_methods */
4040 0, /* tp_members */
4041 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004042 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004043 0, /* tp_dict */
4044 0, /* tp_descr_get */
4045 0, /* tp_descr_set */
4046 0, /* tp_dictoffset */
4047 0, /* tp_init */
4048 0, /* tp_alloc */
4049 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004050 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004051};
4052
4053void
Fred Drakeba096332000-07-09 07:04:36 +00004054PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004055{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004056 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004057 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004058 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004059 if (w == NULL || !PyString_Check(*pv)) {
4060 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004061 *pv = NULL;
4062 return;
4063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004064 v = string_concat((PyStringObject *) *pv, w);
4065 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004066 *pv = v;
4067}
4068
Guido van Rossum013142a1994-08-30 08:19:36 +00004069void
Fred Drakeba096332000-07-09 07:04:36 +00004070PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004071{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004072 PyString_Concat(pv, w);
4073 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004074}
4075
4076
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004077/* The following function breaks the notion that strings are immutable:
4078 it changes the size of a string. We get away with this only if there
4079 is only one module referencing the object. You can also think of it
4080 as creating a new string object and destroying the old one, only
4081 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004082 already be known to some other part of the code...
4083 Note that if there's not enough memory to resize the string, the original
4084 string object at *pv is deallocated, *pv is set to NULL, an "out of
4085 memory" exception is set, and -1 is returned. Else (on success) 0 is
4086 returned, and the value in *pv may or may not be the same as on input.
4087 As always, an extra byte is allocated for a trailing \0 byte (newsize
4088 does *not* include that), and a trailing \0 byte is stored.
4089*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004090
4091int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004092_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004093{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004094 register PyObject *v;
4095 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004096 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00004097 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4098 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004099 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004100 Py_DECREF(v);
4101 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004102 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004103 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004104 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004105 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004106 _Py_ForgetReference(v);
4107 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004108 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004109 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004110 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004111 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004112 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004113 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004114 _Py_NewReference(*pv);
4115 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004116 sv->ob_size = newsize;
4117 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004118 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004119 return 0;
4120}
Guido van Rossume5372401993-03-16 12:15:04 +00004121
4122/* Helpers for formatstring */
4123
Thomas Wouters477c8d52006-05-27 19:21:47 +00004124Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004125getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004126{
Thomas Wouters977485d2006-02-16 15:59:12 +00004127 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004128 if (argidx < arglen) {
4129 (*p_argidx)++;
4130 if (arglen < 0)
4131 return args;
4132 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004133 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004134 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004135 PyErr_SetString(PyExc_TypeError,
4136 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004137 return NULL;
4138}
4139
Tim Peters38fd5b62000-09-21 05:43:11 +00004140/* Format codes
4141 * F_LJUST '-'
4142 * F_SIGN '+'
4143 * F_BLANK ' '
4144 * F_ALT '#'
4145 * F_ZERO '0'
4146 */
Guido van Rossume5372401993-03-16 12:15:04 +00004147#define F_LJUST (1<<0)
4148#define F_SIGN (1<<1)
4149#define F_BLANK (1<<2)
4150#define F_ALT (1<<3)
4151#define F_ZERO (1<<4)
4152
Thomas Wouters477c8d52006-05-27 19:21:47 +00004153Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004154formatfloat(char *buf, size_t buflen, int flags,
4155 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004156{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004157 /* fmt = '%#.' + `prec` + `type`
4158 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004159 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004160 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004161 x = PyFloat_AsDouble(v);
4162 if (x == -1.0 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004163 PyErr_Format(PyExc_TypeError, "float argument required, "
4164 "not %.200s", v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004165 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004166 }
Guido van Rossume5372401993-03-16 12:15:04 +00004167 if (prec < 0)
4168 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004169 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4170 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004171 /* Worst case length calc to ensure no buffer overrun:
4172
4173 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004174 fmt = %#.<prec>g
4175 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004176 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004177 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004178
4179 'f' formats:
4180 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4181 len = 1 + 50 + 1 + prec = 52 + prec
4182
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004183 If prec=0 the effective precision is 1 (the leading digit is
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00004184 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004185
4186 */
4187 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4188 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004189 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004190 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004191 return -1;
4192 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004193 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4194 (flags&F_ALT) ? "#" : "",
4195 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004196 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004197 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004198}
4199
Tim Peters38fd5b62000-09-21 05:43:11 +00004200/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4201 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4202 * Python's regular ints.
4203 * Return value: a new PyString*, or NULL if error.
4204 * . *pbuf is set to point into it,
4205 * *plen set to the # of chars following that.
4206 * Caller must decref it when done using pbuf.
4207 * The string starting at *pbuf is of the form
4208 * "-"? ("0x" | "0X")? digit+
4209 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004210 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004211 * There will be at least prec digits, zero-filled on the left if
4212 * necessary to get that many.
4213 * val object to be converted
4214 * flags bitmask of format flags; only F_ALT is looked at
4215 * prec minimum number of digits; 0-fill on left if needed
4216 * type a character in [duoxX]; u acts the same as d
4217 *
4218 * CAUTION: o, x and X conversions on regular ints can never
4219 * produce a '-' sign, but can for Python's unbounded ints.
4220 */
4221PyObject*
4222_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4223 char **pbuf, int *plen)
4224{
4225 PyObject *result = NULL;
4226 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004227 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004228 int sign; /* 1 if '-', else 0 */
4229 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004230 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004231 int numdigits; /* len == numnondigits + numdigits */
4232 int numnondigits = 0;
4233
Guido van Rossumddefaf32007-01-14 03:31:43 +00004234 /* Avoid exceeding SSIZE_T_MAX */
4235 if (prec > PY_SSIZE_T_MAX-3) {
4236 PyErr_SetString(PyExc_OverflowError,
4237 "precision too large");
4238 return NULL;
4239 }
4240
4241
Tim Peters38fd5b62000-09-21 05:43:11 +00004242 switch (type) {
4243 case 'd':
4244 case 'u':
4245 result = val->ob_type->tp_str(val);
4246 break;
4247 case 'o':
4248 result = val->ob_type->tp_as_number->nb_oct(val);
4249 break;
4250 case 'x':
4251 case 'X':
4252 numnondigits = 2;
4253 result = val->ob_type->tp_as_number->nb_hex(val);
4254 break;
4255 default:
4256 assert(!"'type' not in [duoxX]");
4257 }
4258 if (!result)
4259 return NULL;
4260
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00004261 buf = PyString_AsString(result);
4262 if (!buf) {
4263 Py_DECREF(result);
4264 return NULL;
4265 }
4266
Tim Peters38fd5b62000-09-21 05:43:11 +00004267 /* To modify the string in-place, there can only be one reference. */
4268 if (result->ob_refcnt != 1) {
4269 PyErr_BadInternalCall();
4270 return NULL;
4271 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004272 llen = PyString_Size(result);
Thomas Wouters89f507f2006-12-13 04:49:30 +00004273 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004274 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4275 return NULL;
4276 }
4277 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004278 if (buf[len-1] == 'L') {
4279 --len;
4280 buf[len] = '\0';
4281 }
4282 sign = buf[0] == '-';
4283 numnondigits += sign;
4284 numdigits = len - numnondigits;
4285 assert(numdigits > 0);
4286
Tim Petersfff53252001-04-12 18:38:48 +00004287 /* Get rid of base marker unless F_ALT */
4288 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004289 /* Need to skip 0x, 0X or 0. */
4290 int skipped = 0;
4291 switch (type) {
4292 case 'o':
4293 assert(buf[sign] == '0');
4294 /* If 0 is only digit, leave it alone. */
4295 if (numdigits > 1) {
4296 skipped = 1;
4297 --numdigits;
4298 }
4299 break;
4300 case 'x':
4301 case 'X':
4302 assert(buf[sign] == '0');
4303 assert(buf[sign + 1] == 'x');
4304 skipped = 2;
4305 numnondigits -= 2;
4306 break;
4307 }
4308 if (skipped) {
4309 buf += skipped;
4310 len -= skipped;
4311 if (sign)
4312 buf[0] = '-';
4313 }
4314 assert(len == numnondigits + numdigits);
4315 assert(numdigits > 0);
4316 }
4317
4318 /* Fill with leading zeroes to meet minimum width. */
4319 if (prec > numdigits) {
4320 PyObject *r1 = PyString_FromStringAndSize(NULL,
4321 numnondigits + prec);
4322 char *b1;
4323 if (!r1) {
4324 Py_DECREF(result);
4325 return NULL;
4326 }
4327 b1 = PyString_AS_STRING(r1);
4328 for (i = 0; i < numnondigits; ++i)
4329 *b1++ = *buf++;
4330 for (i = 0; i < prec - numdigits; i++)
4331 *b1++ = '0';
4332 for (i = 0; i < numdigits; i++)
4333 *b1++ = *buf++;
4334 *b1 = '\0';
4335 Py_DECREF(result);
4336 result = r1;
4337 buf = PyString_AS_STRING(result);
4338 len = numnondigits + prec;
4339 }
4340
4341 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004342 if (type == 'X') {
4343 /* Need to convert all lower case letters to upper case.
4344 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004345 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004346 if (buf[i] >= 'a' && buf[i] <= 'x')
4347 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004348 }
4349 *pbuf = buf;
4350 *plen = len;
4351 return result;
4352}
4353
Thomas Wouters477c8d52006-05-27 19:21:47 +00004354Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004355formatint(char *buf, size_t buflen, int flags,
4356 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004357{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004358 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004359 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4360 + 1 + 1 = 24 */
4361 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004362 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004363 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004364
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004365 x = PyInt_AsLong(v);
4366 if (x == -1 && PyErr_Occurred()) {
Thomas Wouters89f507f2006-12-13 04:49:30 +00004367 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4368 v->ob_type->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004369 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004370 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004371 if (x < 0 && type == 'u') {
4372 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004373 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004374 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4375 sign = "-";
4376 else
4377 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004378 if (prec < 0)
4379 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004380
4381 if ((flags & F_ALT) &&
4382 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004383 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004384 * of issues that cause pain:
4385 * - when 0 is being converted, the C standard leaves off
4386 * the '0x' or '0X', which is inconsistent with other
4387 * %#x/%#X conversions and inconsistent with Python's
4388 * hex() function
4389 * - there are platforms that violate the standard and
4390 * convert 0 with the '0x' or '0X'
4391 * (Metrowerks, Compaq Tru64)
4392 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004393 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004394 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004395 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004396 * We can achieve the desired consistency by inserting our
4397 * own '0x' or '0X' prefix, and substituting %x/%X in place
4398 * of %#x/%#X.
4399 *
4400 * Note that this is the same approach as used in
4401 * formatint() in unicodeobject.c
4402 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004403 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4404 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004405 }
4406 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004407 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4408 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004409 prec, type);
4410 }
4411
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004412 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4413 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004414 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004415 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004416 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004417 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004418 return -1;
4419 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004420 if (sign[0])
4421 PyOS_snprintf(buf, buflen, fmt, -x);
4422 else
4423 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004424 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004425}
4426
Thomas Wouters477c8d52006-05-27 19:21:47 +00004427Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004428formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004429{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004430 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004431 if (PyString_Check(v)) {
4432 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004433 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004434 }
4435 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004436 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004437 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004438 }
4439 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004440 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004441}
4442
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004443/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4444
4445 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4446 chars are formatted. XXX This is a magic number. Each formatting
4447 routine does bounds checking to ensure no overflow, but a better
4448 solution may be to malloc a buffer of appropriate size for each
4449 format. For now, the current solution is sufficient.
4450*/
4451#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004452
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004453PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004454PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004455{
4456 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004457 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004458 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004459 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004460 PyObject *result, *orig_args;
4461#ifdef Py_USING_UNICODE
4462 PyObject *v, *w;
4463#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004464 PyObject *dict = NULL;
4465 if (format == NULL || !PyString_Check(format) || args == NULL) {
4466 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004467 return NULL;
4468 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004469 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004470 fmt = PyString_AS_STRING(format);
4471 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004472 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004473 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004474 if (result == NULL)
4475 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004476 res = PyString_AsString(result);
4477 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004478 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004479 argidx = 0;
4480 }
4481 else {
4482 arglen = -1;
4483 argidx = -2;
4484 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004485 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4486 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004487 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004488 while (--fmtcnt >= 0) {
4489 if (*fmt != '%') {
4490 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004491 rescnt = fmtcnt + 100;
4492 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004493 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004494 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004495 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004496 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004497 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004498 }
4499 *res++ = *fmt++;
4500 }
4501 else {
4502 /* Got a format specifier */
4503 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004504 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004505 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004506 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004507 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004508 PyObject *v = NULL;
4509 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004510 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004511 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004512 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004513 char formatbuf[FORMATBUFLEN];
4514 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004515#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004516 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004517 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004518#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004519
Guido van Rossumda9c2711996-12-05 21:58:58 +00004520 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004521 if (*fmt == '(') {
4522 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004523 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004524 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004525 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004526
4527 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004528 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004529 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004530 goto error;
4531 }
4532 ++fmt;
4533 --fmtcnt;
4534 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004535 /* Skip over balanced parentheses */
4536 while (pcount > 0 && --fmtcnt >= 0) {
4537 if (*fmt == ')')
4538 --pcount;
4539 else if (*fmt == '(')
4540 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004541 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004542 }
4543 keylen = fmt - keystart - 1;
4544 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004545 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004546 "incomplete format key");
4547 goto error;
4548 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004549 key = PyString_FromStringAndSize(keystart,
4550 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004551 if (key == NULL)
4552 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004553 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004554 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004555 args_owned = 0;
4556 }
4557 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004558 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004559 if (args == NULL) {
4560 goto error;
4561 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004562 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004563 arglen = -1;
4564 argidx = -2;
4565 }
Guido van Rossume5372401993-03-16 12:15:04 +00004566 while (--fmtcnt >= 0) {
4567 switch (c = *fmt++) {
4568 case '-': flags |= F_LJUST; continue;
4569 case '+': flags |= F_SIGN; continue;
4570 case ' ': flags |= F_BLANK; continue;
4571 case '#': flags |= F_ALT; continue;
4572 case '0': flags |= F_ZERO; continue;
4573 }
4574 break;
4575 }
4576 if (c == '*') {
4577 v = getnextarg(args, arglen, &argidx);
4578 if (v == NULL)
4579 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004580 if (!PyInt_Check(v)) {
4581 PyErr_SetString(PyExc_TypeError,
4582 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004583 goto error;
4584 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004585 width = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004586 if (width == -1 && PyErr_Occurred())
4587 goto error;
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004588 if (width < 0) {
4589 flags |= F_LJUST;
4590 width = -width;
4591 }
Guido van Rossume5372401993-03-16 12:15:04 +00004592 if (--fmtcnt >= 0)
4593 c = *fmt++;
4594 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004595 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004596 width = c - '0';
4597 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004598 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004599 if (!isdigit(c))
4600 break;
4601 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004602 PyErr_SetString(
4603 PyExc_ValueError,
4604 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004605 goto error;
4606 }
4607 width = width*10 + (c - '0');
4608 }
4609 }
4610 if (c == '.') {
4611 prec = 0;
4612 if (--fmtcnt >= 0)
4613 c = *fmt++;
4614 if (c == '*') {
4615 v = getnextarg(args, arglen, &argidx);
4616 if (v == NULL)
4617 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004618 if (!PyInt_Check(v)) {
4619 PyErr_SetString(
4620 PyExc_TypeError,
4621 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004622 goto error;
4623 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004624 prec = PyInt_AsLong(v);
Guido van Rossumddefaf32007-01-14 03:31:43 +00004625 if (prec == -1 && PyErr_Occurred())
4626 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004627 if (prec < 0)
4628 prec = 0;
4629 if (--fmtcnt >= 0)
4630 c = *fmt++;
4631 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004632 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004633 prec = c - '0';
4634 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004635 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004636 if (!isdigit(c))
4637 break;
4638 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004639 PyErr_SetString(
4640 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004641 "prec too big");
4642 goto error;
4643 }
4644 prec = prec*10 + (c - '0');
4645 }
4646 }
4647 } /* prec */
4648 if (fmtcnt >= 0) {
4649 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004650 if (--fmtcnt >= 0)
4651 c = *fmt++;
4652 }
4653 }
4654 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004655 PyErr_SetString(PyExc_ValueError,
4656 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004657 goto error;
4658 }
4659 if (c != '%') {
4660 v = getnextarg(args, arglen, &argidx);
4661 if (v == NULL)
4662 goto error;
4663 }
4664 sign = 0;
4665 fill = ' ';
4666 switch (c) {
4667 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004668 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004669 len = 1;
4670 break;
4671 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004672#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004673 if (PyUnicode_Check(v)) {
4674 fmt = fmt_start;
4675 argidx = argidx_start;
4676 goto unicode;
4677 }
Georg Brandld45014b2005-10-01 17:06:00 +00004678#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004679 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004680#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004681 if (temp != NULL && PyUnicode_Check(temp)) {
4682 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004683 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004684 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004685 goto unicode;
4686 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004687#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004688 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004689 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004690 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004691 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004692 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004693 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004694 if (!PyString_Check(temp)) {
4695 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004696 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004697 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004698 goto error;
4699 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004700 pbuf = PyString_AS_STRING(temp);
4701 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004702 if (prec >= 0 && len > prec)
4703 len = prec;
4704 break;
4705 case 'i':
4706 case 'd':
4707 case 'u':
4708 case 'o':
4709 case 'x':
4710 case 'X':
4711 if (c == 'i')
4712 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004713 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004714 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004715 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004716 prec, c, &pbuf, &ilen);
4717 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004718 if (!temp)
4719 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004720 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004721 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004722 else {
4723 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004724 len = formatint(pbuf,
4725 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004726 flags, prec, c, v);
4727 if (len < 0)
4728 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004729 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004730 }
4731 if (flags & F_ZERO)
4732 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004733 break;
4734 case 'e':
4735 case 'E':
4736 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004737 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004738 case 'g':
4739 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004740 if (c == 'F')
4741 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004742 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004743 len = formatfloat(pbuf, sizeof(formatbuf),
4744 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004745 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004746 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004747 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004748 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004749 fill = '0';
4750 break;
4751 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004752#ifdef Py_USING_UNICODE
4753 if (PyUnicode_Check(v)) {
4754 fmt = fmt_start;
4755 argidx = argidx_start;
4756 goto unicode;
4757 }
4758#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004759 pbuf = formatbuf;
4760 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004761 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004762 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004763 break;
4764 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004765 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004766 "unsupported format character '%c' (0x%x) "
Thomas Wouters89f507f2006-12-13 04:49:30 +00004767 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004768 c, c,
Thomas Wouters89f507f2006-12-13 04:49:30 +00004769 (Py_ssize_t)(fmt - 1 -
4770 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004771 goto error;
4772 }
4773 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004774 if (*pbuf == '-' || *pbuf == '+') {
4775 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004776 len--;
4777 }
4778 else if (flags & F_SIGN)
4779 sign = '+';
4780 else if (flags & F_BLANK)
4781 sign = ' ';
4782 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004783 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004784 }
4785 if (width < len)
4786 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004787 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004788 reslen -= rescnt;
4789 rescnt = width + fmtcnt + 100;
4790 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004791 if (reslen < 0) {
4792 Py_DECREF(result);
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004793 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004794 return PyErr_NoMemory();
4795 }
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004796 if (_PyString_Resize(&result, reslen) < 0) {
4797 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004798 return NULL;
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004799 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004800 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004801 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004802 }
4803 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004804 if (fill != ' ')
4805 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004806 rescnt--;
4807 if (width > len)
4808 width--;
4809 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004810 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4811 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004812 assert(pbuf[1] == c);
4813 if (fill != ' ') {
4814 *res++ = *pbuf++;
4815 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004816 }
Tim Petersfff53252001-04-12 18:38:48 +00004817 rescnt -= 2;
4818 width -= 2;
4819 if (width < 0)
4820 width = 0;
4821 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004822 }
4823 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004824 do {
4825 --rescnt;
4826 *res++ = fill;
4827 } while (--width > len);
4828 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004829 if (fill == ' ') {
4830 if (sign)
4831 *res++ = sign;
4832 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004833 (c == 'x' || c == 'X')) {
4834 assert(pbuf[0] == '0');
4835 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004836 *res++ = *pbuf++;
4837 *res++ = *pbuf++;
4838 }
4839 }
Thomas Wouters4d70c3d2006-06-08 14:42:34 +00004840 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004841 res += len;
4842 rescnt -= len;
4843 while (--width >= len) {
4844 --rescnt;
4845 *res++ = ' ';
4846 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004847 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004848 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004849 "not all arguments converted during string formatting");
Georg Brandl6ce7d1e2007-02-26 13:48:28 +00004850 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004851 goto error;
4852 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004853 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004854 } /* '%' */
4855 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004856 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004857 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004858 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004859 goto error;
4860 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004861 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004862 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004863 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004864 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004865 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004866
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004867#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004868 unicode:
4869 if (args_owned) {
4870 Py_DECREF(args);
4871 args_owned = 0;
4872 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004873 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004874 if (PyTuple_Check(orig_args) && argidx > 0) {
4875 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004876 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004877 v = PyTuple_New(n);
4878 if (v == NULL)
4879 goto error;
4880 while (--n >= 0) {
4881 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4882 Py_INCREF(w);
4883 PyTuple_SET_ITEM(v, n, w);
4884 }
4885 args = v;
4886 } else {
4887 Py_INCREF(orig_args);
4888 args = orig_args;
4889 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004890 args_owned = 1;
4891 /* Take what we have of the result and let the Unicode formatting
4892 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004893 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004894 if (_PyString_Resize(&result, rescnt))
4895 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004896 fmtcnt = PyString_GET_SIZE(format) - \
4897 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004898 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4899 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004900 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004901 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004902 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004903 if (v == NULL)
4904 goto error;
4905 /* Paste what we have (result) to what the Unicode formatting
4906 function returned (v) and return the result (or error) */
4907 w = PyUnicode_Concat(result, v);
4908 Py_DECREF(result);
4909 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004910 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004911 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004912#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004913
Guido van Rossume5372401993-03-16 12:15:04 +00004914 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004915 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004916 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004917 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004918 }
Guido van Rossume5372401993-03-16 12:15:04 +00004919 return NULL;
4920}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004921
Guido van Rossum2a61e741997-01-18 07:55:05 +00004922void
Fred Drakeba096332000-07-09 07:04:36 +00004923PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004924{
4925 register PyStringObject *s = (PyStringObject *)(*p);
4926 PyObject *t;
4927 if (s == NULL || !PyString_Check(s))
4928 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004929 /* If it's a string subclass, we don't really know what putting
4930 it in the interned dict might do. */
4931 if (!PyString_CheckExact(s))
4932 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004933 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004934 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004935 if (interned == NULL) {
4936 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004937 if (interned == NULL) {
4938 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004939 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004940 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004941 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004942 t = PyDict_GetItem(interned, (PyObject *)s);
4943 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004944 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004945 Py_DECREF(*p);
4946 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004947 return;
4948 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004949
Armin Rigo79f7ad22004-08-07 19:27:39 +00004950 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004951 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004952 return;
4953 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004954 /* The two references in interned are not counted by refcnt.
4955 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004956 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004957 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004958}
4959
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004960void
4961PyString_InternImmortal(PyObject **p)
4962{
4963 PyString_InternInPlace(p);
4964 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4965 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4966 Py_INCREF(*p);
4967 }
4968}
4969
Guido van Rossum2a61e741997-01-18 07:55:05 +00004970
4971PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004972PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004973{
4974 PyObject *s = PyString_FromString(cp);
4975 if (s == NULL)
4976 return NULL;
4977 PyString_InternInPlace(&s);
4978 return s;
4979}
4980
Guido van Rossum8cf04761997-08-02 02:57:45 +00004981void
Fred Drakeba096332000-07-09 07:04:36 +00004982PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004983{
4984 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004985 for (i = 0; i < UCHAR_MAX + 1; i++) {
4986 Py_XDECREF(characters[i]);
4987 characters[i] = NULL;
4988 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004989 Py_XDECREF(nullstring);
4990 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004991}
Barry Warsawa903ad982001-02-23 16:40:48 +00004992
Barry Warsawa903ad982001-02-23 16:40:48 +00004993void _Py_ReleaseInternedStrings(void)
4994{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004995 PyObject *keys;
4996 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004997 Py_ssize_t i, n;
Thomas Wouters27d517b2007-02-25 20:39:11 +00004998 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004999
5000 if (interned == NULL || !PyDict_Check(interned))
5001 return;
5002 keys = PyDict_Keys(interned);
5003 if (keys == NULL || !PyList_Check(keys)) {
5004 PyErr_Clear();
5005 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005006 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005007
5008 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5009 detector, interned strings are not forcibly deallocated; rather, we
5010 give them their stolen references back, and then clear and DECREF
5011 the interned dict. */
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00005012
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005013 n = PyList_GET_SIZE(keys);
Thomas Wouters27d517b2007-02-25 20:39:11 +00005014 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5015 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005016 for (i = 0; i < n; i++) {
5017 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5018 switch (s->ob_sstate) {
5019 case SSTATE_NOT_INTERNED:
5020 /* XXX Shouldn't happen */
5021 break;
5022 case SSTATE_INTERNED_IMMORTAL:
5023 s->ob_refcnt += 1;
Thomas Wouters27d517b2007-02-25 20:39:11 +00005024 immortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005025 break;
5026 case SSTATE_INTERNED_MORTAL:
5027 s->ob_refcnt += 2;
Thomas Wouters27d517b2007-02-25 20:39:11 +00005028 mortal_size += s->ob_size;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005029 break;
5030 default:
5031 Py_FatalError("Inconsistent interned string state.");
5032 }
5033 s->ob_sstate = SSTATE_NOT_INTERNED;
5034 }
Thomas Wouters27d517b2007-02-25 20:39:11 +00005035 fprintf(stderr, "total size of all interned strings: "
5036 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5037 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005038 Py_DECREF(keys);
5039 PyDict_Clear(interned);
5040 Py_DECREF(interned);
5041 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005042}
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005043
5044
5045/*********************** Str Iterator ****************************/
5046
5047typedef struct {
5048 PyObject_HEAD
Guido van Rossum49d6b072006-08-17 21:11:47 +00005049 Py_ssize_t it_index;
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005050 PyStringObject *it_seq; /* Set to NULL when iterator is exhausted */
5051} striterobject;
5052
5053static void
5054striter_dealloc(striterobject *it)
5055{
5056 _PyObject_GC_UNTRACK(it);
5057 Py_XDECREF(it->it_seq);
5058 PyObject_GC_Del(it);
5059}
5060
5061static int
5062striter_traverse(striterobject *it, visitproc visit, void *arg)
5063{
5064 Py_VISIT(it->it_seq);
5065 return 0;
5066}
5067
5068static PyObject *
5069striter_next(striterobject *it)
5070{
5071 PyStringObject *seq;
5072 PyObject *item;
5073
5074 assert(it != NULL);
5075 seq = it->it_seq;
5076 if (seq == NULL)
5077 return NULL;
5078 assert(PyString_Check(seq));
5079
5080 if (it->it_index < PyString_GET_SIZE(seq)) {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005081 item = PyString_FromStringAndSize(
5082 PyString_AS_STRING(seq)+it->it_index, 1);
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005083 if (item != NULL)
5084 ++it->it_index;
5085 return item;
5086 }
5087
5088 Py_DECREF(seq);
5089 it->it_seq = NULL;
5090 return NULL;
5091}
5092
5093static PyObject *
5094striter_len(striterobject *it)
5095{
5096 Py_ssize_t len = 0;
5097 if (it->it_seq)
5098 len = PyString_GET_SIZE(it->it_seq) - it->it_index;
5099 return PyInt_FromSsize_t(len);
5100}
5101
Guido van Rossum49d6b072006-08-17 21:11:47 +00005102PyDoc_STRVAR(length_hint_doc,
5103 "Private method returning an estimate of len(list(it)).");
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005104
5105static PyMethodDef striter_methods[] = {
Guido van Rossum49d6b072006-08-17 21:11:47 +00005106 {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
5107 length_hint_doc},
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005108 {NULL, NULL} /* sentinel */
5109};
5110
5111PyTypeObject PyStringIter_Type = {
5112 PyObject_HEAD_INIT(&PyType_Type)
5113 0, /* ob_size */
Guido van Rossum49d6b072006-08-17 21:11:47 +00005114 "striterator", /* tp_name */
5115 sizeof(striterobject), /* tp_basicsize */
Guido van Rossum50e9fb92006-08-17 05:42:55 +00005116 0, /* tp_itemsize */
5117 /* methods */
5118 (destructor)striter_dealloc, /* tp_dealloc */
5119 0, /* tp_print */
5120 0, /* tp_getattr */
5121 0, /* tp_setattr */
5122 0, /* tp_compare */
5123 0, /* tp_repr */
5124 0, /* tp_as_number */
5125 0, /* tp_as_sequence */
5126 0, /* tp_as_mapping */
5127 0, /* tp_hash */
5128 0, /* tp_call */
5129 0, /* tp_str */
5130 PyObject_GenericGetAttr, /* tp_getattro */
5131 0, /* tp_setattro */
5132 0, /* tp_as_buffer */
5133 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
5134 0, /* tp_doc */
5135 (traverseproc)striter_traverse, /* tp_traverse */
5136 0, /* tp_clear */
5137 0, /* tp_richcompare */
5138 0, /* tp_weaklistoffset */
5139 PyObject_SelfIter, /* tp_iter */
5140 (iternextfunc)striter_next, /* tp_iternext */
5141 striter_methods, /* tp_methods */
5142 0,
5143};
5144
5145static PyObject *
5146str_iter(PyObject *seq)
5147{
5148 striterobject *it;
5149
5150 if (!PyString_Check(seq)) {
5151 PyErr_BadInternalCall();
5152 return NULL;
5153 }
5154 it = PyObject_GC_New(striterobject, &PyStringIter_Type);
5155 if (it == NULL)
5156 return NULL;
5157 it->it_index = 0;
5158 Py_INCREF(seq);
5159 it->it_seq = (PyStringObject *)seq;
5160 _PyObject_GC_TRACK(it);
5161 return (PyObject *)it;
5162}