blob: 21f59ac0f967105dbce011fa4c103c0ce7c8d844 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Martin v. Löwis5cb69362006-04-14 09:08:42 +00003#define PY_SSIZE_T_CLEAN
Fredrik Lundh7c940d12006-05-26 16:32:42 +00004
Guido van Rossumc0b618a1997-05-02 03:12:38 +00005#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00006
Guido van Rossum013142a1994-08-30 08:19:36 +00007#include <ctype.h>
8
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00009#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
Guido van Rossumc0b618a1997-05-02 03:12:38 +000013static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000014static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossum45ec02a2002-08-19 21:43:18 +000016/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
Tim Petersae1d0c92006-03-17 03:29:34 +000021 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000022 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000026/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000027 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000029 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000030
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000031 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000032 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000033
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000034 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000044
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000045 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000053PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000054{
Tim Peters9e897f42001-05-09 07:37:07 +000055 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000056 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000057 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000064 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000067#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000070 Py_INCREF(op);
71 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000072 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000073
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000074 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000075 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000076 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000078 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000080 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000081 if (str != NULL)
Fredrik Lundh80f8e802006-05-28 12:06:46 +000082 Py_MEMCPY(op->ob_sval, str, size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000083 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000084 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000086 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000088 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000091 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000092 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000094 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000095 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000096 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000098 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000099}
100
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000102PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000103{
Tim Peters62de65b2001-12-06 20:29:32 +0000104 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000105 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000106
107 assert(str != NULL);
108 size = strlen(str);
Martin v. Löwis8ce358f2006-04-13 07:22:51 +0000109 if (size > PY_SSIZE_T_MAX) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000118 Py_INCREF(op);
119 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 Py_INCREF(op);
126 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000128
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000129 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000133 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000134 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000135 op->ob_sstate = SSTATE_NOT_INTERNED;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000136 Py_MEMCPY(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000137 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000138 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000141 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000142 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000143 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000144 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000147 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000148 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000149 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000150 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000151 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000152}
153
Barry Warsawdadace02001-08-24 18:32:06 +0000154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
Tim Petersc15c4f12001-10-02 21:32:07 +0000157 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000158 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000159 const char* f;
160 char *s;
161 PyObject* string;
162
Tim Petersc15c4f12001-10-02 21:32:07 +0000163#ifdef VA_LIST_IS_ARRAY
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000164 Py_MEMCPY(count, vargs, sizeof(va_list));
Tim Petersc15c4f12001-10-02 21:32:07 +0000165#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000169 count = vargs;
170#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000171#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
Tim Peters8931ff12006-05-13 23:28:20 +0000179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
Tim Petersae1d0c92006-03-17 03:29:34 +0000184 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000185
Barry Warsawdadace02001-08-24 18:32:06 +0000186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000193 case 'd': case 'u': case 'i': case 'x':
Barry Warsawdadace02001-08-24 18:32:06 +0000194 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000209 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000219 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000233
Barry Warsawdadace02001-08-24 18:32:06 +0000234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000239 Py_ssize_t i;
240 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000241 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
Tim Peters8931ff12006-05-13 23:28:20 +0000255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
Barry Warsawdadace02001-08-24 18:32:06 +0000258 longflag = 1;
259 ++f;
260 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000261 /* handle the size_t flag. */
Tim Peters8931ff12006-05-13 23:28:20 +0000262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 size_tflag = 1;
264 ++f;
265 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000266
Barry Warsawdadace02001-08-24 18:32:06 +0000267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000274 else if (size_tflag)
Martin v. Löwis822f34a2006-05-13 13:34:04 +0000275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
Tim Peters8931ff12006-05-13 23:28:20 +0000281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
Barry Warsawdadace02001-08-24 18:32:06 +0000293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000306 Py_MEMCPY(s, p, i);
Barry Warsawdadace02001-08-24 18:32:06 +0000307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
Barry Warsawdadace02001-08-24 18:32:06 +0000319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000332
Barry Warsawdadace02001-08-24 18:32:06 +0000333 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000335 return string;
336}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000337
Barry Warsawdadace02001-08-24 18:32:06 +0000338PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000339PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000340{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000341 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000352}
353
354
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000355PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000356 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000357 const char *encoding,
358 const char *errors)
359{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000380
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000389
390 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000394
395 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000396
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000397 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000411#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000420#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000424 Py_TYPE(v)->tp_name);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000436 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000441
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000450PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000455
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000463 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000487 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000488 if (v == NULL)
489 goto onError;
490
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000491#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000500#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +0000504 Py_TYPE(v)->tp_name);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000505 Py_DECREF(v);
506 goto onError;
507 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000508
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000509 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000510
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000511 onError:
512 return NULL;
513}
514
Guido van Rossum234f9421993-06-17 12:35:49 +0000515static void
Fred Drakeba096332000-07-09 07:04:36 +0000516string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000517{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
Christian Heimese93237d2007-12-19 02:37:44 +0000524 Py_REFCNT(op) = 3;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
Christian Heimese93237d2007-12-19 02:37:44 +0000536 Py_TYPE(op)->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000537}
538
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000545 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000547 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000555 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000562 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000568 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
Fredrik Lundh80f8e802006-05-28 12:06:46 +0000585 Py_MEMCPY(p, r, rn);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000619 if (s < end && '0' <= *s && *s <= '7') {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000620 c = (c<<3) + *s++ - '0';
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000621 if (s < end && '0' <= *s && *s <= '7')
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
Guido van Rossum1c1ac382007-10-29 22:15:05 +0000627 if (s+1 < end &&
628 isxdigit(Py_CHARMASK(s[0])) &&
629 isxdigit(Py_CHARMASK(s[1])))
630 {
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000631 unsigned int x = 0;
632 c = Py_CHARMASK(*s);
633 s++;
634 if (isdigit(c))
635 x = c - '0';
636 else if (islower(c))
637 x = 10 + c - 'a';
638 else
639 x = 10 + c - 'A';
640 x = x << 4;
641 c = Py_CHARMASK(*s);
642 s++;
643 if (isdigit(c))
644 x += c - '0';
645 else if (islower(c))
646 x += 10 + c - 'a';
647 else
648 x += 10 + c - 'A';
649 *p++ = x;
650 break;
651 }
652 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000653 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000654 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000655 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000656 }
657 if (strcmp(errors, "replace") == 0) {
658 *p++ = '?';
659 } else if (strcmp(errors, "ignore") == 0)
660 /* do nothing */;
661 else {
662 PyErr_Format(PyExc_ValueError,
663 "decoding error; "
664 "unknown error handling code: %.400s",
665 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000666 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000667 }
668#ifndef Py_USING_UNICODE
669 case 'u':
670 case 'U':
671 case 'N':
672 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000673 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000674 "Unicode escapes not legal "
675 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000676 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000677 }
678#endif
679 default:
680 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000681 s--;
682 goto non_esc; /* an arbitry number of unescaped
683 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000684 }
685 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000686 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000687 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000688 return v;
689 failed:
690 Py_DECREF(v);
691 return NULL;
692}
693
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000694/* -------------------------------------------------------------------- */
695/* object api */
696
Martin v. Löwis18e16552006-02-15 17:27:45 +0000697static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000698string_getsize(register PyObject *op)
699{
700 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000701 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000702 if (PyString_AsStringAndSize(op, &s, &len))
703 return -1;
704 return len;
705}
706
707static /*const*/ char *
708string_getbuffer(register PyObject *op)
709{
710 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000711 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000712 if (PyString_AsStringAndSize(op, &s, &len))
713 return NULL;
714 return s;
715}
716
Martin v. Löwis18e16552006-02-15 17:27:45 +0000717Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000718PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000719{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000720 if (!PyString_Check(op))
721 return string_getsize(op);
Christian Heimese93237d2007-12-19 02:37:44 +0000722 return Py_SIZE(op);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000723}
724
725/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000726PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000727{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000728 if (!PyString_Check(op))
729 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000730 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000731}
732
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000733int
734PyString_AsStringAndSize(register PyObject *obj,
735 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000736 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000737{
738 if (s == NULL) {
739 PyErr_BadInternalCall();
740 return -1;
741 }
742
743 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000744#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000745 if (PyUnicode_Check(obj)) {
746 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
747 if (obj == NULL)
748 return -1;
749 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000750 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000751#endif
752 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000753 PyErr_Format(PyExc_TypeError,
754 "expected string or Unicode object, "
Christian Heimese93237d2007-12-19 02:37:44 +0000755 "%.200s found", Py_TYPE(obj)->tp_name);
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000756 return -1;
757 }
758 }
759
760 *s = PyString_AS_STRING(obj);
761 if (len != NULL)
762 *len = PyString_GET_SIZE(obj);
Skip Montanaro429433b2006-04-18 00:35:43 +0000763 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000764 PyErr_SetString(PyExc_TypeError,
765 "expected string without null bytes");
766 return -1;
767 }
768 return 0;
769}
770
Fredrik Lundhaf722372006-05-25 17:55:31 +0000771/* -------------------------------------------------------------------- */
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000772/* Methods */
Fredrik Lundhaf722372006-05-25 17:55:31 +0000773
Fredrik Lundha50d2012006-05-26 17:04:58 +0000774#define STRINGLIB_CHAR char
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000775
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000776#define STRINGLIB_CMP memcmp
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000777#define STRINGLIB_LEN PyString_GET_SIZE
778#define STRINGLIB_NEW PyString_FromStringAndSize
779#define STRINGLIB_STR PyString_AS_STRING
Fredrik Lundhb3167cb2006-05-26 18:15:38 +0000780
Fredrik Lundhb9479482006-05-26 17:22:38 +0000781#define STRINGLIB_EMPTY nullstring
Fredrik Lundhaf722372006-05-25 17:55:31 +0000782
Fredrik Lundha50d2012006-05-26 17:04:58 +0000783#include "stringlib/fastsearch.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000784
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +0000785#include "stringlib/count.h"
Fredrik Lundhe6e43c82006-05-26 19:48:07 +0000786#include "stringlib/find.h"
Fredrik Lundhb9479482006-05-26 17:22:38 +0000787#include "stringlib/partition.h"
Fredrik Lundhaf722372006-05-25 17:55:31 +0000788
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000789
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000790static int
Fred Drakeba096332000-07-09 07:04:36 +0000791string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792{
Brett Cannon01531592007-09-17 03:28:34 +0000793 Py_ssize_t i, str_len;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000794 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000795 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000796
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000797 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000798 if (! PyString_CheckExact(op)) {
799 int ret;
800 /* A str subclass may have its own __str__ method. */
801 op = (PyStringObject *) PyObject_Str((PyObject *)op);
802 if (op == NULL)
803 return -1;
804 ret = string_print(op, fp, flags);
805 Py_DECREF(op);
806 return ret;
807 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000808 if (flags & Py_PRINT_RAW) {
Armin Rigo7ccbca92006-10-04 12:17:45 +0000809 char *data = op->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +0000810 Py_ssize_t size = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000811 Py_BEGIN_ALLOW_THREADS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000812 while (size > INT_MAX) {
813 /* Very long strings cannot be written atomically.
814 * But don't write exactly INT_MAX bytes at a time
815 * to avoid memory aligment issues.
816 */
817 const int chunk_size = INT_MAX & ~0x3FFF;
818 fwrite(data, 1, chunk_size, fp);
819 data += chunk_size;
820 size -= chunk_size;
821 }
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000822#ifdef __VMS
Armin Rigo7ccbca92006-10-04 12:17:45 +0000823 if (size) fwrite(data, (int)size, 1, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000824#else
Armin Rigo7ccbca92006-10-04 12:17:45 +0000825 fwrite(data, 1, (int)size, fp);
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000826#endif
Brett Cannon01531592007-09-17 03:28:34 +0000827 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000828 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000829 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000830
Thomas Wouters7e474022000-07-16 12:04:32 +0000831 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 quote = '\'';
Christian Heimese93237d2007-12-19 02:37:44 +0000833 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
834 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000835 quote = '"';
836
Christian Heimese93237d2007-12-19 02:37:44 +0000837 str_len = Py_SIZE(op);
Brett Cannon01531592007-09-17 03:28:34 +0000838 Py_BEGIN_ALLOW_THREADS
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000840 for (i = 0; i < str_len; i++) {
841 /* Since strings are immutable and the caller should have a
842 reference, accessing the interal buffer should not be an issue
843 with the GIL released. */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000845 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000846 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000848 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000849 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000850 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000851 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000852 fprintf(fp, "\\r");
853 else if (c < ' ' || c >= 0x7f)
854 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000855 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000856 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000857 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000858 fputc(quote, fp);
Brett Cannon01531592007-09-17 03:28:34 +0000859 Py_END_ALLOW_THREADS
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000860 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000861}
862
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000863PyObject *
864PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000866 register PyStringObject* op = (PyStringObject*) obj;
Christian Heimese93237d2007-12-19 02:37:44 +0000867 size_t newsize = 2 + 4 * Py_SIZE(op);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000868 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +0000869 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000870 PyErr_SetString(PyExc_OverflowError,
871 "string is too large to make repr");
Guido van Rossum9b847b42007-11-06 23:32:56 +0000872 return NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000873 }
874 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000875 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000876 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000877 }
878 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000879 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000880 register char c;
881 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000882 int quote;
883
Thomas Wouters7e474022000-07-16 12:04:32 +0000884 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000885 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000886 if (smartquotes &&
Christian Heimese93237d2007-12-19 02:37:44 +0000887 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
888 !memchr(op->ob_sval, '"', Py_SIZE(op)))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000889 quote = '"';
890
Tim Peters9161c8b2001-12-03 01:55:38 +0000891 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000892 *p++ = quote;
Christian Heimese93237d2007-12-19 02:37:44 +0000893 for (i = 0; i < Py_SIZE(op); i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000894 /* There's at least enough room for a hex escape
895 and a closing quote. */
896 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000897 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000898 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000899 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000900 else if (c == '\t')
901 *p++ = '\\', *p++ = 't';
902 else if (c == '\n')
903 *p++ = '\\', *p++ = 'n';
904 else if (c == '\r')
905 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000906 else if (c < ' ' || c >= 0x7f) {
907 /* For performance, we don't want to call
908 PyOS_snprintf here (extra layers of
909 function call). */
910 sprintf(p, "\\x%02x", c & 0xff);
911 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000912 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000913 else
914 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000916 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000917 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000918 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000919 _PyString_Resize(
Thomas Wouters568f1d02006-04-21 13:54:43 +0000920 &v, (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000921 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000922 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000923}
924
Guido van Rossum189f1df2001-05-01 16:51:53 +0000925static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000926string_repr(PyObject *op)
927{
928 return PyString_Repr(op, 1);
929}
930
931static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000932string_str(PyObject *s)
933{
Tim Petersc9933152001-10-16 20:18:24 +0000934 assert(PyString_Check(s));
935 if (PyString_CheckExact(s)) {
936 Py_INCREF(s);
937 return s;
938 }
939 else {
940 /* Subtype -- return genuine string with the same value. */
941 PyStringObject *t = (PyStringObject *) s;
Christian Heimese93237d2007-12-19 02:37:44 +0000942 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
Tim Petersc9933152001-10-16 20:18:24 +0000943 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000944}
945
Martin v. Löwis18e16552006-02-15 17:27:45 +0000946static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000947string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000948{
Christian Heimese93237d2007-12-19 02:37:44 +0000949 return Py_SIZE(a);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000950}
951
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000952static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000953string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954{
Andrew Dalke598710c2006-05-25 18:18:39 +0000955 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000956 register PyStringObject *op;
957 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000958#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000959 if (PyUnicode_Check(bb))
960 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000961#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000962 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000963 "cannot concatenate 'str' and '%.200s' objects",
Christian Heimese93237d2007-12-19 02:37:44 +0000964 Py_TYPE(bb)->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000965 return NULL;
966 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000967#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000968 /* Optimize cases with empty left or right operand */
Christian Heimese93237d2007-12-19 02:37:44 +0000969 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
Tim Peters8fa5dd02001-09-12 02:18:30 +0000970 PyString_CheckExact(a) && PyString_CheckExact(b)) {
Christian Heimese93237d2007-12-19 02:37:44 +0000971 if (Py_SIZE(a) == 0) {
Tim Peters8fa5dd02001-09-12 02:18:30 +0000972 Py_INCREF(bb);
973 return bb;
974 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000975 Py_INCREF(a);
976 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000977 }
Christian Heimese93237d2007-12-19 02:37:44 +0000978 size = Py_SIZE(a) + Py_SIZE(b);
Andrew Dalke598710c2006-05-25 18:18:39 +0000979 if (size < 0) {
980 PyErr_SetString(PyExc_OverflowError,
981 "strings are too large to concat");
982 return NULL;
983 }
984
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000985 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000986 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000987 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000988 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000989 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000990 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000991 op->ob_sstate = SSTATE_NOT_INTERNED;
Christian Heimese93237d2007-12-19 02:37:44 +0000992 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
993 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000994 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000995 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000996#undef b
997}
998
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000999static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001000string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001002 register Py_ssize_t i;
1003 register Py_ssize_t j;
1004 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001005 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +00001006 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001007 if (n < 0)
1008 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +00001009 /* watch out for overflows: the size can overflow int,
1010 * and the # of bytes needed can overflow size_t
1011 */
Christian Heimese93237d2007-12-19 02:37:44 +00001012 size = Py_SIZE(a) * n;
1013 if (n && size / n != Py_SIZE(a)) {
Tim Peters8f422462000-09-09 06:13:41 +00001014 PyErr_SetString(PyExc_OverflowError,
1015 "repeated string is too long");
1016 return NULL;
1017 }
Christian Heimese93237d2007-12-19 02:37:44 +00001018 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001019 Py_INCREF(a);
1020 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001021 }
Tim Peterse7c05322004-06-27 17:24:49 +00001022 nbytes = (size_t)size;
1023 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +00001024 PyErr_SetString(PyExc_OverflowError,
1025 "repeated string is too long");
1026 return NULL;
1027 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001028 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00001029 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +00001030 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001031 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +00001032 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001033 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00001034 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001035 op->ob_sval[size] = '\0';
Christian Heimese93237d2007-12-19 02:37:44 +00001036 if (Py_SIZE(a) == 1 && n > 0) {
Raymond Hettinger0a2f8492003-01-06 22:42:41 +00001037 memset(op->ob_sval, a->ob_sval[0] , n);
1038 return (PyObject *) op;
1039 }
Raymond Hettinger698258a2003-01-06 10:33:56 +00001040 i = 0;
1041 if (i < size) {
Christian Heimese93237d2007-12-19 02:37:44 +00001042 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1043 i = Py_SIZE(a);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001044 }
1045 while (i < size) {
1046 j = (i <= size-i) ? i : size-i;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001047 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
Raymond Hettinger698258a2003-01-06 10:33:56 +00001048 i += j;
1049 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001050 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001051}
1052
1053/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1054
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001055static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +00001056string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +00001057 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001058 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001059{
1060 if (i < 0)
1061 i = 0;
1062 if (j < 0)
1063 j = 0; /* Avoid signed/unsigned bug in next line */
Christian Heimese93237d2007-12-19 02:37:44 +00001064 if (j > Py_SIZE(a))
1065 j = Py_SIZE(a);
1066 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00001067 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001068 Py_INCREF(a);
1069 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070 }
1071 if (j < i)
1072 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001073 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001074}
1075
Guido van Rossum9284a572000-03-07 15:53:43 +00001076static int
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001077string_contains(PyObject *str_obj, PyObject *sub_obj)
Guido van Rossum9284a572000-03-07 15:53:43 +00001078{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001079 if (!PyString_CheckExact(sub_obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001080#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001081 if (PyUnicode_Check(sub_obj))
1082 return PyUnicode_Contains(str_obj, sub_obj);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001083#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001084 if (!PyString_Check(sub_obj)) {
Georg Brandl283a1352006-11-19 08:48:30 +00001085 PyErr_Format(PyExc_TypeError,
1086 "'in <string>' requires string as left operand, "
Christian Heimese93237d2007-12-19 02:37:44 +00001087 "not %.200s", Py_TYPE(sub_obj)->tp_name);
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001088 return -1;
1089 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001090 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001091
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001092 return stringlib_contains_obj(str_obj, sub_obj);
Guido van Rossum9284a572000-03-07 15:53:43 +00001093}
1094
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001095static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001096string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001097{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001098 char pchar;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001099 PyObject *v;
Christian Heimese93237d2007-12-19 02:37:44 +00001100 if (i < 0 || i >= Py_SIZE(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001101 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001102 return NULL;
1103 }
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001104 pchar = a->ob_sval[i];
1105 v = (PyObject *)characters[pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001106 if (v == NULL)
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001107 v = PyString_FromStringAndSize(&pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001108 else {
1109#ifdef COUNT_ALLOCS
1110 one_strings++;
1111#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001112 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001113 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001114 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001115}
1116
Martin v. Löwiscd353062001-05-24 16:56:35 +00001117static PyObject*
1118string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001119{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001120 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001121 Py_ssize_t len_a, len_b;
1122 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001123 PyObject *result;
1124
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001125 /* Make sure both arguments are strings. */
1126 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001127 result = Py_NotImplemented;
1128 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001129 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001130 if (a == b) {
1131 switch (op) {
1132 case Py_EQ:case Py_LE:case Py_GE:
1133 result = Py_True;
1134 goto out;
1135 case Py_NE:case Py_LT:case Py_GT:
1136 result = Py_False;
1137 goto out;
1138 }
1139 }
1140 if (op == Py_EQ) {
1141 /* Supporting Py_NE here as well does not save
1142 much time, since Py_NE is rarely used. */
Christian Heimese93237d2007-12-19 02:37:44 +00001143 if (Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001144 && (a->ob_sval[0] == b->ob_sval[0]
Christian Heimese93237d2007-12-19 02:37:44 +00001145 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001146 result = Py_True;
1147 } else {
1148 result = Py_False;
1149 }
1150 goto out;
1151 }
Christian Heimese93237d2007-12-19 02:37:44 +00001152 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
Martin v. Löwiscd353062001-05-24 16:56:35 +00001153 min_len = (len_a < len_b) ? len_a : len_b;
1154 if (min_len > 0) {
1155 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1156 if (c==0)
1157 c = memcmp(a->ob_sval, b->ob_sval, min_len);
Neal Norwitz7218c2d2007-02-25 15:53:36 +00001158 } else
Martin v. Löwiscd353062001-05-24 16:56:35 +00001159 c = 0;
1160 if (c == 0)
1161 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1162 switch (op) {
1163 case Py_LT: c = c < 0; break;
1164 case Py_LE: c = c <= 0; break;
1165 case Py_EQ: assert(0); break; /* unreachable */
1166 case Py_NE: c = c != 0; break;
1167 case Py_GT: c = c > 0; break;
1168 case Py_GE: c = c >= 0; break;
1169 default:
1170 result = Py_NotImplemented;
1171 goto out;
1172 }
1173 result = c ? Py_True : Py_False;
1174 out:
1175 Py_INCREF(result);
1176 return result;
1177}
1178
1179int
1180_PyString_Eq(PyObject *o1, PyObject *o2)
1181{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00001182 PyStringObject *a = (PyStringObject*) o1;
1183 PyStringObject *b = (PyStringObject*) o2;
Christian Heimese93237d2007-12-19 02:37:44 +00001184 return Py_SIZE(a) == Py_SIZE(b)
Martin v. Löwiscd353062001-05-24 16:56:35 +00001185 && *a->ob_sval == *b->ob_sval
Christian Heimese93237d2007-12-19 02:37:44 +00001186 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001187}
1188
Guido van Rossum9bfef441993-03-29 10:43:31 +00001189static long
Fred Drakeba096332000-07-09 07:04:36 +00001190string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001191{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001192 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001193 register unsigned char *p;
1194 register long x;
1195
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001196 if (a->ob_shash != -1)
1197 return a->ob_shash;
Christian Heimese93237d2007-12-19 02:37:44 +00001198 len = Py_SIZE(a);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001199 p = (unsigned char *) a->ob_sval;
1200 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001201 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001202 x = (1000003*x) ^ *p++;
Christian Heimese93237d2007-12-19 02:37:44 +00001203 x ^= Py_SIZE(a);
Guido van Rossum9bfef441993-03-29 10:43:31 +00001204 if (x == -1)
1205 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001206 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001207 return x;
1208}
1209
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001210static PyObject*
1211string_subscript(PyStringObject* self, PyObject* item)
1212{
Neal Norwitz8a87f5d2006-08-12 17:03:09 +00001213 if (PyIndex_Check(item)) {
1214 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001215 if (i == -1 && PyErr_Occurred())
1216 return NULL;
1217 if (i < 0)
1218 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001219 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001220 }
1221 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001222 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001223 char* source_buf;
1224 char* result_buf;
1225 PyObject* result;
1226
Tim Petersae1d0c92006-03-17 03:29:34 +00001227 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 PyString_GET_SIZE(self),
1229 &start, &stop, &step, &slicelength) < 0) {
1230 return NULL;
1231 }
1232
1233 if (slicelength <= 0) {
1234 return PyString_FromStringAndSize("", 0);
1235 }
Thomas Wouters3ccec682007-08-28 15:28:19 +00001236 else if (start == 0 && step == 1 &&
1237 slicelength == PyString_GET_SIZE(self) &&
1238 PyString_CheckExact(self)) {
1239 Py_INCREF(self);
1240 return (PyObject *)self;
1241 }
1242 else if (step == 1) {
1243 return PyString_FromStringAndSize(
1244 PyString_AS_STRING(self) + start,
1245 slicelength);
1246 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001247 else {
1248 source_buf = PyString_AsString((PyObject*)self);
Anthony Baxtera6286212006-04-11 07:42:36 +00001249 result_buf = (char *)PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001250 if (result_buf == NULL)
1251 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001252
Tim Petersae1d0c92006-03-17 03:29:34 +00001253 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001254 cur += step, i++) {
1255 result_buf[i] = source_buf[cur];
1256 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001257
1258 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001259 slicelength);
1260 PyMem_Free(result_buf);
1261 return result;
1262 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001263 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001264 else {
Georg Brandl283a1352006-11-19 08:48:30 +00001265 PyErr_Format(PyExc_TypeError,
1266 "string indices must be integers, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00001267 Py_TYPE(item)->tp_name);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001268 return NULL;
1269 }
1270}
1271
Martin v. Löwis18e16552006-02-15 17:27:45 +00001272static Py_ssize_t
1273string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001274{
1275 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001276 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001277 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001278 return -1;
1279 }
1280 *ptr = (void *)self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001281 return Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001282}
1283
Martin v. Löwis18e16552006-02-15 17:27:45 +00001284static Py_ssize_t
1285string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001286{
Guido van Rossum045e6881997-09-08 18:30:11 +00001287 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001288 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001289 return -1;
1290}
1291
Martin v. Löwis18e16552006-02-15 17:27:45 +00001292static Py_ssize_t
1293string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001294{
1295 if ( lenp )
Christian Heimese93237d2007-12-19 02:37:44 +00001296 *lenp = Py_SIZE(self);
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297 return 1;
1298}
1299
Martin v. Löwis18e16552006-02-15 17:27:45 +00001300static Py_ssize_t
1301string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001302{
1303 if ( index != 0 ) {
1304 PyErr_SetString(PyExc_SystemError,
1305 "accessing non-existent string segment");
1306 return -1;
1307 }
1308 *ptr = self->ob_sval;
Christian Heimese93237d2007-12-19 02:37:44 +00001309 return Py_SIZE(self);
Guido van Rossum1db70701998-10-08 02:18:52 +00001310}
1311
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001312static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001313 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001314 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001315 (ssizeargfunc)string_repeat, /*sq_repeat*/
1316 (ssizeargfunc)string_item, /*sq_item*/
1317 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001318 0, /*sq_ass_item*/
1319 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001320 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001321};
1322
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001323static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001324 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001325 (binaryfunc)string_subscript,
1326 0,
1327};
1328
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001329static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001330 (readbufferproc)string_buffer_getreadbuf,
1331 (writebufferproc)string_buffer_getwritebuf,
1332 (segcountproc)string_buffer_getsegcount,
1333 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001334};
1335
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336
1337
1338#define LEFTSTRIP 0
1339#define RIGHTSTRIP 1
1340#define BOTHSTRIP 2
1341
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001342/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001343static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1344
1345#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001346
Andrew Dalke525eab32006-05-26 14:00:45 +00001347
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001348/* Don't call if length < 2 */
1349#define Py_STRING_MATCH(target, offset, pattern, length) \
1350 (target[offset] == pattern[0] && \
1351 target[offset+length-1] == pattern[length-1] && \
1352 !memcmp(target+offset+1, pattern+1, length-2) )
1353
1354
Andrew Dalke525eab32006-05-26 14:00:45 +00001355/* Overallocate the initial list to reduce the number of reallocs for small
1356 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1357 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1358 text (roughly 11 words per line) and field delimited data (usually 1-10
1359 fields). For large strings the split algorithms are bandwidth limited
1360 so increasing the preallocation likely will not improve things.*/
1361
1362#define MAX_PREALLOC 12
1363
1364/* 5 splits gives 6 elements */
1365#define PREALLOC_SIZE(maxsplit) \
1366 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1367
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001368#define SPLIT_APPEND(data, left, right) \
1369 str = PyString_FromStringAndSize((data) + (left), \
1370 (right) - (left)); \
1371 if (str == NULL) \
1372 goto onError; \
1373 if (PyList_Append(list, str)) { \
1374 Py_DECREF(str); \
1375 goto onError; \
1376 } \
1377 else \
1378 Py_DECREF(str);
1379
Andrew Dalke02758d62006-05-26 15:21:01 +00001380#define SPLIT_ADD(data, left, right) { \
Andrew Dalke525eab32006-05-26 14:00:45 +00001381 str = PyString_FromStringAndSize((data) + (left), \
1382 (right) - (left)); \
1383 if (str == NULL) \
1384 goto onError; \
1385 if (count < MAX_PREALLOC) { \
1386 PyList_SET_ITEM(list, count, str); \
1387 } else { \
1388 if (PyList_Append(list, str)) { \
1389 Py_DECREF(str); \
1390 goto onError; \
1391 } \
1392 else \
1393 Py_DECREF(str); \
1394 } \
Andrew Dalke02758d62006-05-26 15:21:01 +00001395 count++; }
Andrew Dalke525eab32006-05-26 14:00:45 +00001396
1397/* Always force the list to the expected size. */
Christian Heimese93237d2007-12-19 02:37:44 +00001398#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
Andrew Dalke525eab32006-05-26 14:00:45 +00001399
Andrew Dalke02758d62006-05-26 15:21:01 +00001400#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1401#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1402#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1403#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1404
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001405Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001406split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407{
Skip Montanaro26015492007-12-08 15:33:24 +00001408 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001409 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001410 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001411 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412
1413 if (list == NULL)
1414 return NULL;
1415
Andrew Dalke02758d62006-05-26 15:21:01 +00001416 i = j = 0;
1417
1418 while (maxsplit-- > 0) {
1419 SKIP_SPACE(s, i, len);
1420 if (i==len) break;
1421 j = i; i++;
1422 SKIP_NONSPACE(s, i, len);
Skip Montanaro26015492007-12-08 15:33:24 +00001423 if (j == 0 && i == len && PyString_CheckExact(self)) {
1424 /* No whitespace in self, so just use it as list[0] */
1425 Py_INCREF(self);
1426 PyList_SET_ITEM(list, 0, (PyObject *)self);
1427 count++;
1428 break;
1429 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001430 SPLIT_ADD(s, j, i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001432
1433 if (i < len) {
1434 /* Only occurs when maxsplit was reached */
1435 /* Skip any remaining whitespace and copy to end of string */
1436 SKIP_SPACE(s, i, len);
1437 if (i != len)
1438 SPLIT_ADD(s, i, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001439 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001440 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001442 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001443 Py_DECREF(list);
1444 return NULL;
1445}
1446
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001447Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001448split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001449{
Skip Montanaro26015492007-12-08 15:33:24 +00001450 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001451 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001452 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001453 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001454
1455 if (list == NULL)
1456 return NULL;
1457
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001458 i = j = 0;
1459 while ((j < len) && (maxcount-- > 0)) {
1460 for(; j<len; j++) {
1461 /* I found that using memchr makes no difference */
1462 if (s[j] == ch) {
1463 SPLIT_ADD(s, i, j);
1464 i = j = j + 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001465 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001466 }
1467 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001468 }
Skip Montanaro26015492007-12-08 15:33:24 +00001469 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1470 /* ch not in self, so just use self as list[0] */
1471 Py_INCREF(self);
1472 PyList_SET_ITEM(list, 0, (PyObject *)self);
1473 count++;
1474 }
1475 else if (i <= len) {
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001476 SPLIT_ADD(s, i, len);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001477 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001478 FIX_PREALLOC_SIZE(list);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001479 return list;
1480
1481 onError:
1482 Py_DECREF(list);
1483 return NULL;
1484}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001485
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001486PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487"S.split([sep [,maxsplit]]) -> list of strings\n\
1488\n\
1489Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001490delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001491splits are done. If sep is not specified or is None, any\n\
1492whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493
1494static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001495string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001497 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001498 Py_ssize_t maxsplit = -1, count=0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499 const char *s = PyString_AS_STRING(self), *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001500 PyObject *list, *str, *subobj = Py_None;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001501#ifdef USE_FAST
1502 Py_ssize_t pos;
1503#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504
Martin v. Löwis9c830762006-04-13 08:37:17 +00001505 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001507 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001508 maxsplit = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001509 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001510 return split_whitespace(self, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001511 if (PyString_Check(subobj)) {
1512 sub = PyString_AS_STRING(subobj);
1513 n = PyString_GET_SIZE(subobj);
1514 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001515#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001516 else if (PyUnicode_Check(subobj))
1517 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001518#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001519 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1520 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001521
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 if (n == 0) {
1523 PyErr_SetString(PyExc_ValueError, "empty separator");
1524 return NULL;
1525 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001526 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001527 return split_char(self, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001528
Andrew Dalke525eab32006-05-26 14:00:45 +00001529 list = PyList_New(PREALLOC_SIZE(maxsplit));
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001530 if (list == NULL)
1531 return NULL;
1532
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001533#ifdef USE_FAST
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001534 i = j = 0;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001535 while (maxsplit-- > 0) {
1536 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1537 if (pos < 0)
1538 break;
1539 j = i+pos;
1540 SPLIT_ADD(s, i, j);
1541 i = j + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542 }
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001543#else
1544 i = j = 0;
1545 while ((j+n <= len) && (maxsplit-- > 0)) {
1546 for (; j+n <= len; j++) {
1547 if (Py_STRING_MATCH(s, j, sub, n)) {
1548 SPLIT_ADD(s, i, j);
1549 i = j = j + n;
1550 break;
1551 }
1552 }
1553 }
1554#endif
1555 SPLIT_ADD(s, i, len);
Andrew Dalke525eab32006-05-26 14:00:45 +00001556 FIX_PREALLOC_SIZE(list);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001557 return list;
1558
Andrew Dalke525eab32006-05-26 14:00:45 +00001559 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001560 Py_DECREF(list);
1561 return NULL;
1562}
1563
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001564PyDoc_STRVAR(partition__doc__,
1565"S.partition(sep) -> (head, sep, tail)\n\
1566\n\
1567Searches for the separator sep in S, and returns the part before it,\n\
1568the separator itself, and the part after it. If the separator is not\n\
1569found, returns S and two empty strings.");
1570
1571static PyObject *
Fredrik Lundh450277f2006-05-26 09:46:59 +00001572string_partition(PyStringObject *self, PyObject *sep_obj)
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001573{
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001574 const char *sep;
1575 Py_ssize_t sep_len;
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001576
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001577 if (PyString_Check(sep_obj)) {
1578 sep = PyString_AS_STRING(sep_obj);
1579 sep_len = PyString_GET_SIZE(sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001580 }
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001581#ifdef Py_USING_UNICODE
1582 else if (PyUnicode_Check(sep_obj))
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001583 return PyUnicode_Partition((PyObject *) self, sep_obj);
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001584#endif
Fredrik Lundh06a69dd2006-05-26 08:54:28 +00001585 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001586 return NULL;
1587
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001588 return stringlib_partition(
Fredrik Lundhc2032fb2006-05-26 17:26:39 +00001589 (PyObject*) self,
1590 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1591 sep_obj, sep, sep_len
1592 );
Fredrik Lundhfe5bb7e2006-05-25 23:27:53 +00001593}
1594
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001595PyDoc_STRVAR(rpartition__doc__,
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001596"S.rpartition(sep) -> (tail, sep, head)\n\
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001597\n\
1598Searches for the separator sep in S, starting at the end of S, and returns\n\
1599the part before it, the separator itself, and the part after it. If the\n\
Raymond Hettingera0c95fa2006-09-04 15:32:48 +00001600separator is not found, returns two empty strings and S.");
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001601
1602static PyObject *
1603string_rpartition(PyStringObject *self, PyObject *sep_obj)
1604{
1605 const char *sep;
1606 Py_ssize_t sep_len;
1607
1608 if (PyString_Check(sep_obj)) {
1609 sep = PyString_AS_STRING(sep_obj);
1610 sep_len = PyString_GET_SIZE(sep_obj);
1611 }
1612#ifdef Py_USING_UNICODE
1613 else if (PyUnicode_Check(sep_obj))
1614 return PyUnicode_Partition((PyObject *) self, sep_obj);
1615#endif
1616 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1617 return NULL;
1618
Fredrik Lundh58b5e842006-05-26 19:24:53 +00001619 return stringlib_rpartition(
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00001620 (PyObject*) self,
1621 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1622 sep_obj, sep, sep_len
1623 );
1624}
1625
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001626Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001627rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001628{
Skip Montanaro26015492007-12-08 15:33:24 +00001629 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001630 Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001631 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001632 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001633
1634 if (list == NULL)
1635 return NULL;
1636
Andrew Dalke02758d62006-05-26 15:21:01 +00001637 i = j = len-1;
Neal Norwitza7edb112006-07-30 06:59:13 +00001638
Andrew Dalke02758d62006-05-26 15:21:01 +00001639 while (maxsplit-- > 0) {
1640 RSKIP_SPACE(s, i);
1641 if (i<0) break;
1642 j = i; i--;
1643 RSKIP_NONSPACE(s, i);
Skip Montanaro26015492007-12-08 15:33:24 +00001644 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1645 /* No whitespace in self, so just use it as list[0] */
1646 Py_INCREF(self);
1647 PyList_SET_ITEM(list, 0, (PyObject *)self);
1648 count++;
1649 break;
1650 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001651 SPLIT_ADD(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001652 }
Andrew Dalke02758d62006-05-26 15:21:01 +00001653 if (i >= 0) {
1654 /* Only occurs when maxsplit was reached */
1655 /* Skip any remaining whitespace and copy to beginning of string */
1656 RSKIP_SPACE(s, i);
1657 if (i >= 0)
1658 SPLIT_ADD(s, 0, i + 1);
1659
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001660 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001661 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001662 if (PyList_Reverse(list) < 0)
1663 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001664 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001665 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001666 Py_DECREF(list);
1667 return NULL;
1668}
1669
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001670Py_LOCAL_INLINE(PyObject *)
Skip Montanaro26015492007-12-08 15:33:24 +00001671rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001672{
Skip Montanaro26015492007-12-08 15:33:24 +00001673 const char *s = PyString_AS_STRING(self);
Andrew Dalke525eab32006-05-26 14:00:45 +00001674 register Py_ssize_t i, j, count=0;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001675 PyObject *str;
Andrew Dalke525eab32006-05-26 14:00:45 +00001676 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001677
1678 if (list == NULL)
1679 return NULL;
1680
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001681 i = j = len - 1;
1682 while ((i >= 0) && (maxcount-- > 0)) {
1683 for (; i >= 0; i--) {
1684 if (s[i] == ch) {
1685 SPLIT_ADD(s, i + 1, j + 1);
1686 j = i = i - 1;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001687 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001688 }
1689 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001690 }
Skip Montanaro26015492007-12-08 15:33:24 +00001691 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1692 /* ch not in self, so just use self as list[0] */
1693 Py_INCREF(self);
1694 PyList_SET_ITEM(list, 0, (PyObject *)self);
1695 count++;
1696 }
1697 else if (j >= -1) {
Andrew Dalke525eab32006-05-26 14:00:45 +00001698 SPLIT_ADD(s, 0, j + 1);
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001699 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001700 FIX_PREALLOC_SIZE(list);
Fredrik Lundh554da412006-05-25 19:19:05 +00001701 if (PyList_Reverse(list) < 0)
1702 goto onError;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001703 return list;
1704
1705 onError:
1706 Py_DECREF(list);
1707 return NULL;
1708}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001709
1710PyDoc_STRVAR(rsplit__doc__,
1711"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1712\n\
1713Return a list of the words in the string S, using sep as the\n\
1714delimiter string, starting at the end of the string and working\n\
1715to the front. If maxsplit is given, at most maxsplit splits are\n\
1716done. If sep is not specified or is None, any whitespace string\n\
1717is a separator.");
1718
1719static PyObject *
1720string_rsplit(PyStringObject *self, PyObject *args)
1721{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001722 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
Andrew Dalke525eab32006-05-26 14:00:45 +00001723 Py_ssize_t maxsplit = -1, count=0;
Skip Montanaro26015492007-12-08 15:33:24 +00001724 const char *s, *sub;
Andrew Dalke525eab32006-05-26 14:00:45 +00001725 PyObject *list, *str, *subobj = Py_None;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001726
Martin v. Löwis9c830762006-04-13 08:37:17 +00001727 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001728 return NULL;
1729 if (maxsplit < 0)
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001730 maxsplit = PY_SSIZE_T_MAX;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001731 if (subobj == Py_None)
Skip Montanaro26015492007-12-08 15:33:24 +00001732 return rsplit_whitespace(self, len, maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001733 if (PyString_Check(subobj)) {
1734 sub = PyString_AS_STRING(subobj);
1735 n = PyString_GET_SIZE(subobj);
1736 }
1737#ifdef Py_USING_UNICODE
1738 else if (PyUnicode_Check(subobj))
1739 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1740#endif
1741 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1742 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001743
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001744 if (n == 0) {
1745 PyErr_SetString(PyExc_ValueError, "empty separator");
1746 return NULL;
1747 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001748 else if (n == 1)
Skip Montanaro26015492007-12-08 15:33:24 +00001749 return rsplit_char(self, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001750
Andrew Dalke525eab32006-05-26 14:00:45 +00001751 list = PyList_New(PREALLOC_SIZE(maxsplit));
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001752 if (list == NULL)
1753 return NULL;
1754
1755 j = len;
1756 i = j - n;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001757
Skip Montanaro26015492007-12-08 15:33:24 +00001758 s = PyString_AS_STRING(self);
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001759 while ( (i >= 0) && (maxsplit-- > 0) ) {
1760 for (; i>=0; i--) {
1761 if (Py_STRING_MATCH(s, i, sub, n)) {
1762 SPLIT_ADD(s, i + n, j);
1763 j = i;
1764 i -= n;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001765 break;
Andrew Dalkec5da53b2006-05-26 19:02:09 +00001766 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001767 }
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001768 }
Andrew Dalke525eab32006-05-26 14:00:45 +00001769 SPLIT_ADD(s, 0, j);
1770 FIX_PREALLOC_SIZE(list);
1771 if (PyList_Reverse(list) < 0)
1772 goto onError;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001773 return list;
1774
Andrew Dalke525eab32006-05-26 14:00:45 +00001775onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001776 Py_DECREF(list);
1777 return NULL;
1778}
1779
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001781PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782"S.join(sequence) -> string\n\
1783\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001784Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001785sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786
1787static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001788string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789{
1790 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001791 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001793 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001794 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001795 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001796 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001797 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001798
Tim Peters19fe14e2001-01-19 03:03:47 +00001799 seq = PySequence_Fast(orig, "");
1800 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001801 return NULL;
1802 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001803
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001804 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001805 if (seqlen == 0) {
1806 Py_DECREF(seq);
1807 return PyString_FromString("");
1808 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001810 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001811 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1812 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001813 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001814 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001815 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001816 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001817
Raymond Hettinger674f2412004-08-23 23:23:54 +00001818 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001819 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001820 * Do a pre-pass to figure out the total amount of space we'll
1821 * need (sz), see whether any argument is absurd, and defer to
1822 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001823 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001824 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001825 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001826 item = PySequence_Fast_GET_ITEM(seq, i);
1827 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001828#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001829 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001830 /* Defer to Unicode join.
1831 * CAUTION: There's no gurantee that the
1832 * original sequence can be iterated over
1833 * again, so we must pass seq here.
1834 */
1835 PyObject *result;
1836 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001837 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001838 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001839 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001840#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001841 PyErr_Format(PyExc_TypeError,
Neal Norwitz0e2cbab2006-04-17 05:56:32 +00001842 "sequence item %zd: expected string,"
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001843 " %.80s found",
Christian Heimese93237d2007-12-19 02:37:44 +00001844 i, Py_TYPE(item)->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001845 Py_DECREF(seq);
1846 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001847 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001848 sz += PyString_GET_SIZE(item);
1849 if (i != 0)
1850 sz += seplen;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00001851 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001852 PyErr_SetString(PyExc_OverflowError,
Georg Brandl90e27d32006-06-10 06:40:50 +00001853 "join() result is too long for a Python string");
Tim Peters19fe14e2001-01-19 03:03:47 +00001854 Py_DECREF(seq);
1855 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001857 }
1858
1859 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001860 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001861 if (res == NULL) {
1862 Py_DECREF(seq);
1863 return NULL;
1864 }
1865
1866 /* Catenate everything. */
1867 p = PyString_AS_STRING(res);
1868 for (i = 0; i < seqlen; ++i) {
1869 size_t n;
1870 item = PySequence_Fast_GET_ITEM(seq, i);
1871 n = PyString_GET_SIZE(item);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001872 Py_MEMCPY(p, PyString_AS_STRING(item), n);
Tim Peters19fe14e2001-01-19 03:03:47 +00001873 p += n;
1874 if (i < seqlen - 1) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001875 Py_MEMCPY(p, sep, seplen);
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001876 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001877 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001878 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001879
Jeremy Hylton49048292000-07-11 03:28:17 +00001880 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001881 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882}
1883
Tim Peters52e155e2001-06-16 05:42:57 +00001884PyObject *
1885_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001886{
Tim Petersa7259592001-06-16 05:11:17 +00001887 assert(sep != NULL && PyString_Check(sep));
1888 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001889 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001890}
1891
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001892Py_LOCAL_INLINE(void)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001893string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001894{
1895 if (*end > len)
1896 *end = len;
1897 else if (*end < 0)
1898 *end += len;
1899 if (*end < 0)
1900 *end = 0;
1901 if (*start < 0)
1902 *start += len;
1903 if (*start < 0)
1904 *start = 0;
1905}
1906
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00001907Py_LOCAL_INLINE(Py_ssize_t)
Fred Drakeba096332000-07-09 07:04:36 +00001908string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001910 PyObject *subobj;
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001911 const char *sub;
1912 Py_ssize_t sub_len;
1913 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Facundo Batista57d56692007-11-16 18:04:14 +00001914 PyObject *obj_start=Py_None, *obj_end=Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915
Facundo Batista57d56692007-11-16 18:04:14 +00001916 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1917 &obj_start, &obj_end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001918 return -2;
Facundo Batista57d56692007-11-16 18:04:14 +00001919 /* To support None in "start" and "end" arguments, meaning
1920 the same as if they were not passed.
1921 */
1922 if (obj_start != Py_None)
1923 if (!_PyEval_SliceIndex(obj_start, &start))
1924 return -2;
1925 if (obj_end != Py_None)
1926 if (!_PyEval_SliceIndex(obj_end, &end))
1927 return -2;
1928
Guido van Rossum4c08d552000-03-10 22:55:18 +00001929 if (PyString_Check(subobj)) {
1930 sub = PyString_AS_STRING(subobj);
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001931 sub_len = PyString_GET_SIZE(subobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001932 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001933#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001934 else if (PyUnicode_Check(subobj))
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001935 return PyUnicode_Find(
1936 (PyObject *)self, subobj, start, end, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001937#endif
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001938 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
Fredrik Lundh80f8e802006-05-28 12:06:46 +00001939 /* XXX - the "expected a character buffer object" is pretty
1940 confusing for a non-expert. remap to something else ? */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001941 return -2;
1942
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001943 if (dir > 0)
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001944 return stringlib_find_slice(
1945 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1946 sub, sub_len, start, end);
Fredrik Lundhe6e43c82006-05-26 19:48:07 +00001947 else
Fredrik Lundh0b7ef462006-05-27 15:26:19 +00001948 return stringlib_rfind_slice(
1949 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1950 sub, sub_len, start, end);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001951}
1952
1953
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001954PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955"S.find(sub [,start [,end]]) -> int\n\
1956\n\
1957Return the lowest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001958such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001959arguments start and end are interpreted as in slice notation.\n\
1960\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001961Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001962
1963static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001964string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001965{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001966 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001967 if (result == -2)
1968 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001969 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970}
1971
1972
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001973PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974"S.index(sub [,start [,end]]) -> int\n\
1975\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001976Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001977
1978static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001979string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001980{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001981 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982 if (result == -2)
1983 return NULL;
1984 if (result == -1) {
1985 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001986 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987 return NULL;
1988 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001989 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001990}
1991
1992
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001993PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001994"S.rfind(sub [,start [,end]]) -> int\n\
1995\n\
1996Return the highest index in S where substring sub is found,\n\
Georg Brandl9efd9b62007-07-29 17:38:35 +00001997such that sub is contained within s[start:end]. Optional\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001998arguments start and end are interpreted as in slice notation.\n\
1999\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002000Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002001
2002static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002003string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002005 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002006 if (result == -2)
2007 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002008 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009}
2010
2011
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002012PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013"S.rindex(sub [,start [,end]]) -> int\n\
2014\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002015Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016
2017static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002018string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002020 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021 if (result == -2)
2022 return NULL;
2023 if (result == -1) {
2024 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00002025 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002026 return NULL;
2027 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002028 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002029}
2030
2031
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002032Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002033do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
2034{
2035 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002036 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002037 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002038 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2039 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002040
2041 i = 0;
2042 if (striptype != RIGHTSTRIP) {
2043 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2044 i++;
2045 }
2046 }
2047
2048 j = len;
2049 if (striptype != LEFTSTRIP) {
2050 do {
2051 j--;
2052 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2053 j++;
2054 }
2055
2056 if (i == 0 && j == len && PyString_CheckExact(self)) {
2057 Py_INCREF(self);
2058 return (PyObject*)self;
2059 }
2060 else
2061 return PyString_FromStringAndSize(s+i, j-i);
2062}
2063
2064
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002065Py_LOCAL_INLINE(PyObject *)
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002066do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002067{
2068 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002069 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002071 i = 0;
2072 if (striptype != RIGHTSTRIP) {
2073 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2074 i++;
2075 }
2076 }
2077
2078 j = len;
2079 if (striptype != LEFTSTRIP) {
2080 do {
2081 j--;
2082 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2083 j++;
2084 }
2085
Tim Peters8fa5dd02001-09-12 02:18:30 +00002086 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002087 Py_INCREF(self);
2088 return (PyObject*)self;
2089 }
2090 else
2091 return PyString_FromStringAndSize(s+i, j-i);
2092}
2093
2094
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002095Py_LOCAL_INLINE(PyObject *)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002096do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2097{
2098 PyObject *sep = NULL;
2099
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002100 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002101 return NULL;
2102
2103 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002104 if (PyString_Check(sep))
2105 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00002106#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00002107 else if (PyUnicode_Check(sep)) {
2108 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2109 PyObject *res;
2110 if (uniself==NULL)
2111 return NULL;
2112 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2113 striptype, sep);
2114 Py_DECREF(uniself);
2115 return res;
2116 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00002117#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002118 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00002119#ifdef Py_USING_UNICODE
Neal Norwitz7e957d32006-04-06 08:17:41 +00002120 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002121#else
Neal Norwitz7e957d32006-04-06 08:17:41 +00002122 "%s arg must be None or str",
Walter Dörwald775c11f2002-05-13 09:00:41 +00002123#endif
Neal Norwitz7e957d32006-04-06 08:17:41 +00002124 STRIPNAME(striptype));
2125 return NULL;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002126 }
2127
2128 return do_strip(self, striptype);
2129}
2130
2131
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002132PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002133"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134\n\
2135Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002136whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002137If chars is given and not None, remove characters in chars instead.\n\
2138If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002139
2140static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002141string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002143 if (PyTuple_GET_SIZE(args) == 0)
2144 return do_strip(self, BOTHSTRIP); /* Common case */
2145 else
2146 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147}
2148
2149
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002150PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002151"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002153Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002154If chars is given and not None, remove characters in chars instead.\n\
2155If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002156
2157static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002158string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002160 if (PyTuple_GET_SIZE(args) == 0)
2161 return do_strip(self, LEFTSTRIP); /* Common case */
2162 else
2163 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164}
2165
2166
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002167PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002168"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002169\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002170Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002171If chars is given and not None, remove characters in chars instead.\n\
2172If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002173
2174static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002175string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002177 if (PyTuple_GET_SIZE(args) == 0)
2178 return do_strip(self, RIGHTSTRIP); /* Common case */
2179 else
2180 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002181}
2182
2183
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002184PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002185"S.lower() -> string\n\
2186\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002187Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002189/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2190#ifndef _tolower
2191#define _tolower tolower
2192#endif
2193
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002195string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002196{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002197 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002198 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002199 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002200
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002201 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002202 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002203 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002204
2205 s = PyString_AS_STRING(newobj);
2206
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002207 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002208
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002210 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002211 if (isupper(c))
2212 s[i] = _tolower(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002214
Anthony Baxtera6286212006-04-11 07:42:36 +00002215 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002216}
2217
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002218PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002219"S.upper() -> string\n\
2220\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002221Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002222
Fredrik Lundhdfe503d2006-05-25 16:10:12 +00002223#ifndef _toupper
2224#define _toupper toupper
2225#endif
2226
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002228string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002229{
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002230 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002231 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002232 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002233
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002234 newobj = PyString_FromStringAndSize(NULL, n);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002235 if (!newobj)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002236 return NULL;
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002237
2238 s = PyString_AS_STRING(newobj);
2239
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002240 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002241
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002242 for (i = 0; i < n; i++) {
Fredrik Lundh4b4e33e2006-05-25 15:49:45 +00002243 int c = Py_CHARMASK(s[i]);
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002244 if (islower(c))
2245 s[i] = _toupper(c);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002246 }
Fredrik Lundh39ccef62006-05-25 15:22:03 +00002247
Anthony Baxtera6286212006-04-11 07:42:36 +00002248 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002249}
2250
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002251PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002252"S.title() -> string\n\
2253\n\
2254Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002255characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256
2257static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002258string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259{
2260 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002261 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 int previous_is_cased = 0;
Anthony Baxtera6286212006-04-11 07:42:36 +00002263 PyObject *newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002264
Anthony Baxtera6286212006-04-11 07:42:36 +00002265 newobj = PyString_FromStringAndSize(NULL, n);
2266 if (newobj == NULL)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002267 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002268 s_new = PyString_AsString(newobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269 for (i = 0; i < n; i++) {
2270 int c = Py_CHARMASK(*s++);
2271 if (islower(c)) {
2272 if (!previous_is_cased)
2273 c = toupper(c);
2274 previous_is_cased = 1;
2275 } else if (isupper(c)) {
2276 if (previous_is_cased)
2277 c = tolower(c);
2278 previous_is_cased = 1;
2279 } else
2280 previous_is_cased = 0;
2281 *s_new++ = c;
2282 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002283 return newobj;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002284}
2285
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002286PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002287"S.capitalize() -> string\n\
2288\n\
2289Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002290capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002291
2292static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002293string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002294{
2295 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002296 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002297 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002298
Anthony Baxtera6286212006-04-11 07:42:36 +00002299 newobj = PyString_FromStringAndSize(NULL, n);
2300 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002301 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002302 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002303 if (0 < n) {
2304 int c = Py_CHARMASK(*s++);
2305 if (islower(c))
2306 *s_new = toupper(c);
2307 else
2308 *s_new = c;
2309 s_new++;
2310 }
2311 for (i = 1; i < n; i++) {
2312 int c = Py_CHARMASK(*s++);
2313 if (isupper(c))
2314 *s_new = tolower(c);
2315 else
2316 *s_new = c;
2317 s_new++;
2318 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002319 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002320}
2321
2322
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002323PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324"S.count(sub[, start[, end]]) -> int\n\
2325\n\
Fredrik Lundh763b50f2006-05-22 15:35:12 +00002326Return the number of non-overlapping occurrences of substring sub in\n\
2327string S[start:end]. Optional arguments start and end are interpreted\n\
2328as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002329
2330static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002331string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002332{
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002333 PyObject *sub_obj;
2334 const char *str = PyString_AS_STRING(self), *sub;
2335 Py_ssize_t sub_len;
2336 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002337
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002338 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2339 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002341
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002342 if (PyString_Check(sub_obj)) {
2343 sub = PyString_AS_STRING(sub_obj);
2344 sub_len = PyString_GET_SIZE(sub_obj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002345 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002346#ifdef Py_USING_UNICODE
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002347 else if (PyUnicode_Check(sub_obj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002348 Py_ssize_t count;
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002349 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002350 if (count == -1)
2351 return NULL;
2352 else
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002353 return PyInt_FromSsize_t(count);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002354 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002355#endif
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002356 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002357 return NULL;
2358
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002359 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002360
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002361 return PyInt_FromSsize_t(
2362 stringlib_count(str + start, end - start, sub, sub_len)
2363 );
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364}
2365
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002366PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002367"S.swapcase() -> string\n\
2368\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002370converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002371
2372static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002373string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002374{
2375 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002376 Py_ssize_t i, n = PyString_GET_SIZE(self);
Anthony Baxtera6286212006-04-11 07:42:36 +00002377 PyObject *newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378
Anthony Baxtera6286212006-04-11 07:42:36 +00002379 newobj = PyString_FromStringAndSize(NULL, n);
2380 if (newobj == NULL)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002381 return NULL;
Anthony Baxtera6286212006-04-11 07:42:36 +00002382 s_new = PyString_AsString(newobj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002383 for (i = 0; i < n; i++) {
2384 int c = Py_CHARMASK(*s++);
2385 if (islower(c)) {
2386 *s_new = toupper(c);
2387 }
2388 else if (isupper(c)) {
2389 *s_new = tolower(c);
2390 }
2391 else
2392 *s_new = c;
2393 s_new++;
2394 }
Anthony Baxtera6286212006-04-11 07:42:36 +00002395 return newobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002396}
2397
2398
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002399PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002400"S.translate(table [,deletechars]) -> string\n\
2401\n\
2402Return a copy of the string S, where all characters occurring\n\
2403in the optional argument deletechars are removed, and the\n\
2404remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002405translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002406
2407static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002408string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002409{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002410 register char *input, *output;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002411 const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002412 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002413 PyObject *input_obj = (PyObject*)self;
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002414 const char *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002415 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002416 PyObject *result;
2417 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002418 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002419
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002420 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002421 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002422 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002423
2424 if (PyString_Check(tableobj)) {
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002425 table = PyString_AS_STRING(tableobj);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002426 tablen = PyString_GET_SIZE(tableobj);
2427 }
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002428 else if (tableobj == Py_None) {
2429 table = NULL;
2430 tablen = 256;
2431 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002432#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002434 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435 parameter; instead a mapping to None will cause characters
2436 to be deleted. */
2437 if (delobj != NULL) {
2438 PyErr_SetString(PyExc_TypeError,
2439 "deletions are implemented differently for unicode");
2440 return NULL;
2441 }
2442 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2443 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002444#endif
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002445 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002446 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002447
Martin v. Löwis00b61272002-12-12 20:03:19 +00002448 if (tablen != 256) {
2449 PyErr_SetString(PyExc_ValueError,
2450 "translation table must be 256 characters long");
2451 return NULL;
2452 }
2453
Guido van Rossum4c08d552000-03-10 22:55:18 +00002454 if (delobj != NULL) {
2455 if (PyString_Check(delobj)) {
2456 del_table = PyString_AS_STRING(delobj);
2457 dellen = PyString_GET_SIZE(delobj);
2458 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002459#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002460 else if (PyUnicode_Check(delobj)) {
2461 PyErr_SetString(PyExc_TypeError,
2462 "deletions are implemented differently for unicode");
2463 return NULL;
2464 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002465#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002466 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2467 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002468 }
2469 else {
2470 del_table = NULL;
2471 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002472 }
2473
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002474 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002475 result = PyString_FromStringAndSize((char *)NULL, inlen);
2476 if (result == NULL)
2477 return NULL;
2478 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002479 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002481 if (dellen == 0 && table != NULL) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482 /* If no deletions are required, use faster code */
2483 for (i = inlen; --i >= 0; ) {
2484 c = Py_CHARMASK(*input++);
2485 if (Py_CHARMASK((*output++ = table[c])) != c)
2486 changed = 1;
2487 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002488 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002489 return result;
2490 Py_DECREF(result);
2491 Py_INCREF(input_obj);
2492 return input_obj;
2493 }
2494
Raymond Hettinger4db5fe92007-04-12 04:10:00 +00002495 if (table == NULL) {
2496 for (i = 0; i < 256; i++)
2497 trans_table[i] = Py_CHARMASK(i);
2498 } else {
2499 for (i = 0; i < 256; i++)
2500 trans_table[i] = Py_CHARMASK(table[i]);
2501 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002502
2503 for (i = 0; i < dellen; i++)
2504 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2505
2506 for (i = inlen; --i >= 0; ) {
2507 c = Py_CHARMASK(*input++);
2508 if (trans_table[c] != -1)
2509 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2510 continue;
2511 changed = 1;
2512 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002513 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002514 Py_DECREF(result);
2515 Py_INCREF(input_obj);
2516 return input_obj;
2517 }
2518 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002519 if (inlen > 0)
2520 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521 return result;
2522}
2523
2524
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002525#define FORWARD 1
2526#define REVERSE -1
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002527
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002528/* find and count characters and substrings */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002529
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002530#define findchar(target, target_len, c) \
2531 ((char *)memchr((const void *)(target), c, target_len))
2532
2533/* String ops must return a string. */
2534/* If the object is subclass of string, create a copy */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002535Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002536return_self(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002537{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002538 if (PyString_CheckExact(self)) {
2539 Py_INCREF(self);
2540 return self;
2541 }
2542 return (PyStringObject *)PyString_FromStringAndSize(
2543 PyString_AS_STRING(self),
2544 PyString_GET_SIZE(self));
2545}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002546
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002547Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002548countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002549{
2550 Py_ssize_t count=0;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002551 const char *start=target;
2552 const char *end=target+target_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002553
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002554 while ( (start=findchar(start, end-start, c)) != NULL ) {
2555 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002556 if (count >= maxcount)
2557 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002558 start += 1;
2559 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002560 return count;
2561}
2562
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002563Py_LOCAL(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002564findstring(const char *target, Py_ssize_t target_len,
2565 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002566 Py_ssize_t start,
2567 Py_ssize_t end,
2568 int direction)
2569{
2570 if (start < 0) {
2571 start += target_len;
2572 if (start < 0)
2573 start = 0;
2574 }
2575 if (end > target_len) {
2576 end = target_len;
2577 } else if (end < 0) {
2578 end += target_len;
2579 if (end < 0)
2580 end = 0;
2581 }
2582
2583 /* zero-length substrings always match at the first attempt */
2584 if (pattern_len == 0)
2585 return (direction > 0) ? start : end;
2586
2587 end -= pattern_len;
2588
2589 if (direction < 0) {
2590 for (; end >= start; end--)
2591 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2592 return end;
2593 } else {
2594 for (; start <= end; start++)
2595 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2596 return start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002597 }
2598 return -1;
2599}
2600
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00002601Py_LOCAL_INLINE(Py_ssize_t)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002602countstring(const char *target, Py_ssize_t target_len,
2603 const char *pattern, Py_ssize_t pattern_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002604 Py_ssize_t start,
2605 Py_ssize_t end,
Andrew Dalke51324072006-05-26 20:25:22 +00002606 int direction, Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002607{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002608 Py_ssize_t count=0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002609
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002610 if (start < 0) {
2611 start += target_len;
2612 if (start < 0)
2613 start = 0;
2614 }
2615 if (end > target_len) {
2616 end = target_len;
2617 } else if (end < 0) {
2618 end += target_len;
2619 if (end < 0)
2620 end = 0;
2621 }
2622
2623 /* zero-length substrings match everywhere */
Andrew Dalke51324072006-05-26 20:25:22 +00002624 if (pattern_len == 0 || maxcount == 0) {
2625 if (target_len+1 < maxcount)
2626 return target_len+1;
2627 return maxcount;
2628 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002629
2630 end -= pattern_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002631 if (direction < 0) {
Andrew Dalke51324072006-05-26 20:25:22 +00002632 for (; (end >= start); end--)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002633 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2634 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002635 if (--maxcount <= 0) break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002636 end -= pattern_len-1;
2637 }
2638 } else {
Andrew Dalke51324072006-05-26 20:25:22 +00002639 for (; (start <= end); start++)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002640 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2641 count++;
Andrew Dalke51324072006-05-26 20:25:22 +00002642 if (--maxcount <= 0)
2643 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002644 start += pattern_len-1;
2645 }
2646 }
2647 return count;
2648}
2649
2650
Fredrik Lundh2d23d5b2006-05-27 10:05:10 +00002651/* Algorithms for different cases of string replacement */
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002652
2653/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002654Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002655replace_interleave(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002656 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002657 Py_ssize_t maxcount)
2658{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002659 char *self_s, *result_s;
2660 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002661 Py_ssize_t count, i, product;
2662 PyStringObject *result;
2663
2664 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002665
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002666 /* 1 at the end plus 1 after every character */
2667 count = self_len+1;
2668 if (maxcount < count)
2669 count = maxcount;
Neal Norwitza7edb112006-07-30 06:59:13 +00002670
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002671 /* Check for overflow */
2672 /* result_len = count * to_len + self_len; */
2673 product = count * to_len;
2674 if (product / to_len != count) {
2675 PyErr_SetString(PyExc_OverflowError,
2676 "replace string is too long");
2677 return NULL;
2678 }
2679 result_len = product + self_len;
2680 if (result_len < 0) {
2681 PyErr_SetString(PyExc_OverflowError,
2682 "replace string is too long");
2683 return NULL;
2684 }
2685
2686 if (! (result = (PyStringObject *)
2687 PyString_FromStringAndSize(NULL, result_len)) )
2688 return NULL;
2689
2690 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002691 result_s = PyString_AS_STRING(result);
2692
2693 /* TODO: special case single character, which doesn't need memcpy */
2694
2695 /* Lay the first one down (guaranteed this will occur) */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002696 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002697 result_s += to_len;
2698 count -= 1;
2699
2700 for (i=0; i<count; i++) {
2701 *result_s++ = *self_s++;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002702 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002703 result_s += to_len;
2704 }
2705
2706 /* Copy the rest of the original string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002707 Py_MEMCPY(result_s, self_s, self_len-i);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002708
2709 return result;
2710}
2711
2712/* Special case for deleting a single character */
2713/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002714Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002715replace_delete_single_character(PyStringObject *self,
2716 char from_c, Py_ssize_t maxcount)
2717{
2718 char *self_s, *result_s;
2719 char *start, *next, *end;
2720 Py_ssize_t self_len, result_len;
2721 Py_ssize_t count;
2722 PyStringObject *result;
2723
2724 self_len = PyString_GET_SIZE(self);
2725 self_s = PyString_AS_STRING(self);
2726
Andrew Dalke51324072006-05-26 20:25:22 +00002727 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002728 if (count == 0) {
2729 return return_self(self);
2730 }
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002731
2732 result_len = self_len - count; /* from_len == 1 */
2733 assert(result_len>=0);
2734
2735 if ( (result = (PyStringObject *)
2736 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2737 return NULL;
2738 result_s = PyString_AS_STRING(result);
2739
2740 start = self_s;
2741 end = self_s + self_len;
2742 while (count-- > 0) {
2743 next = findchar(start, end-start, from_c);
2744 if (next == NULL)
2745 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002746 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002747 result_s += (next-start);
2748 start = next+1;
2749 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002750 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002751
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002752 return result;
2753}
2754
2755/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2756
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002757Py_LOCAL(PyStringObject *)
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002758replace_delete_substring(PyStringObject *self,
2759 const char *from_s, Py_ssize_t from_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002760 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002761 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002762 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002763 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002764 Py_ssize_t count, offset;
2765 PyStringObject *result;
2766
2767 self_len = PyString_GET_SIZE(self);
2768 self_s = PyString_AS_STRING(self);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002769
2770 count = countstring(self_s, self_len,
2771 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002772 0, self_len, 1,
2773 maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002774
2775 if (count == 0) {
2776 /* no matches */
2777 return return_self(self);
2778 }
2779
2780 result_len = self_len - (count * from_len);
2781 assert (result_len>=0);
Neal Norwitza7edb112006-07-30 06:59:13 +00002782
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002783 if ( (result = (PyStringObject *)
2784 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2785 return NULL;
Neal Norwitza7edb112006-07-30 06:59:13 +00002786
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002787 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002788
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002789 start = self_s;
2790 end = self_s + self_len;
2791 while (count-- > 0) {
2792 offset = findstring(start, end-start,
2793 from_s, from_len,
2794 0, end-start, FORWARD);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002795 if (offset == -1)
2796 break;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002797 next = start + offset;
Neal Norwitza7edb112006-07-30 06:59:13 +00002798
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002799 Py_MEMCPY(result_s, start, next-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002800
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002801 result_s += (next-start);
2802 start = next+from_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002803 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002804 Py_MEMCPY(result_s, start, end-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002805 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002806}
2807
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002808/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002809Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002810replace_single_character_in_place(PyStringObject *self,
2811 char from_c, char to_c,
2812 Py_ssize_t maxcount)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002813{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002814 char *self_s, *result_s, *start, *end, *next;
2815 Py_ssize_t self_len;
2816 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002817
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002818 /* The result string will be the same size */
2819 self_s = PyString_AS_STRING(self);
2820 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002821
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002822 next = findchar(self_s, self_len, from_c);
Neal Norwitza7edb112006-07-30 06:59:13 +00002823
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002824 if (next == NULL) {
2825 /* No matches; return the original string */
2826 return return_self(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002827 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002828
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002829 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002830 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002831 if (result == NULL)
2832 return NULL;
2833 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002834 Py_MEMCPY(result_s, self_s, self_len);
Neal Norwitza7edb112006-07-30 06:59:13 +00002835
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002836 /* change everything in-place, starting with this one */
2837 start = result_s + (next-self_s);
2838 *start = to_c;
2839 start++;
2840 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002841
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002842 while (--maxcount > 0) {
2843 next = findchar(start, end-start, from_c);
2844 if (next == NULL)
2845 break;
2846 *next = to_c;
2847 start = next+1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002848 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002849
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002850 return result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002851}
2852
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002853/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002854Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002855replace_substring_in_place(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002856 const char *from_s, Py_ssize_t from_len,
2857 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002858 Py_ssize_t maxcount)
2859{
2860 char *result_s, *start, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002861 char *self_s;
2862 Py_ssize_t self_len, offset;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002863 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002864
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002865 /* The result string will be the same size */
Neal Norwitza7edb112006-07-30 06:59:13 +00002866
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002867 self_s = PyString_AS_STRING(self);
2868 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002869
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002870 offset = findstring(self_s, self_len,
2871 from_s, from_len,
2872 0, self_len, FORWARD);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002873 if (offset == -1) {
2874 /* No matches; return the original string */
2875 return return_self(self);
2876 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002877
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002878 /* Need to make a new string */
Andrew Dalke8c909102006-05-25 17:53:00 +00002879 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002880 if (result == NULL)
2881 return NULL;
2882 result_s = PyString_AS_STRING(result);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002883 Py_MEMCPY(result_s, self_s, self_len);
Andrew Dalke8c909102006-05-25 17:53:00 +00002884
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002885 /* change everything in-place, starting with this one */
2886 start = result_s + offset;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002887 Py_MEMCPY(start, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002888 start += from_len;
2889 end = result_s + self_len;
Neal Norwitza7edb112006-07-30 06:59:13 +00002890
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002891 while ( --maxcount > 0) {
2892 offset = findstring(start, end-start,
2893 from_s, from_len,
2894 0, end-start, FORWARD);
2895 if (offset==-1)
2896 break;
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002897 Py_MEMCPY(start+offset, to_s, from_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002898 start += offset+from_len;
2899 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002900
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002901 return result;
2902}
2903
2904/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002905Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002906replace_single_character(PyStringObject *self,
2907 char from_c,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002908 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002909 Py_ssize_t maxcount)
2910{
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002911 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002912 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002913 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002914 Py_ssize_t count, product;
2915 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002916
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002917 self_s = PyString_AS_STRING(self);
2918 self_len = PyString_GET_SIZE(self);
Neal Norwitza7edb112006-07-30 06:59:13 +00002919
Andrew Dalke51324072006-05-26 20:25:22 +00002920 count = countchar(self_s, self_len, from_c, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002921 if (count == 0) {
2922 /* no matches, return unchanged */
2923 return return_self(self);
2924 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002925
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002926 /* use the difference between current and new, hence the "-1" */
2927 /* result_len = self_len + count * (to_len-1) */
2928 product = count * (to_len-1);
2929 if (product / (to_len-1) != count) {
2930 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2931 return NULL;
2932 }
2933 result_len = self_len + product;
2934 if (result_len < 0) {
2935 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2936 return NULL;
2937 }
Neal Norwitza7edb112006-07-30 06:59:13 +00002938
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002939 if ( (result = (PyStringObject *)
2940 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2941 return NULL;
2942 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00002943
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002944 start = self_s;
2945 end = self_s + self_len;
2946 while (count-- > 0) {
2947 next = findchar(start, end-start, from_c);
2948 if (next == NULL)
2949 break;
Neal Norwitza7edb112006-07-30 06:59:13 +00002950
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002951 if (next == start) {
2952 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002953 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002954 result_s += to_len;
2955 start += 1;
2956 } else {
2957 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002958 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002959 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002960 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002961 result_s += to_len;
2962 start = next+1;
2963 }
2964 }
2965 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00002966 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00002967
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002968 return result;
2969}
2970
2971/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Fredrik Lundh7c940d12006-05-26 16:32:42 +00002972Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002973replace_substring(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002974 const char *from_s, Py_ssize_t from_len,
2975 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002976 Py_ssize_t maxcount) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002977 char *self_s, *result_s;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002978 char *start, *next, *end;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002979 Py_ssize_t self_len, result_len;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002980 Py_ssize_t count, offset, product;
2981 PyStringObject *result;
Neal Norwitza7edb112006-07-30 06:59:13 +00002982
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002983 self_s = PyString_AS_STRING(self);
2984 self_len = PyString_GET_SIZE(self);
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002985
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002986 count = countstring(self_s, self_len,
2987 from_s, from_len,
Andrew Dalke51324072006-05-26 20:25:22 +00002988 0, self_len, FORWARD, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002989 if (count == 0) {
2990 /* no matches, return unchanged */
2991 return return_self(self);
2992 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00002993
Fredrik Lundhe68955c2006-05-25 17:08:14 +00002994 /* Check for overflow */
2995 /* result_len = self_len + count * (to_len-from_len) */
2996 product = count * (to_len-from_len);
2997 if (product / (to_len-from_len) != count) {
2998 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2999 return NULL;
3000 }
3001 result_len = self_len + product;
3002 if (result_len < 0) {
3003 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3004 return NULL;
3005 }
Neal Norwitza7edb112006-07-30 06:59:13 +00003006
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003007 if ( (result = (PyStringObject *)
3008 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3009 return NULL;
3010 result_s = PyString_AS_STRING(result);
Neal Norwitza7edb112006-07-30 06:59:13 +00003011
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003012 start = self_s;
3013 end = self_s + self_len;
3014 while (count-- > 0) {
3015 offset = findstring(start, end-start,
3016 from_s, from_len,
3017 0, end-start, FORWARD);
3018 if (offset == -1)
3019 break;
3020 next = start+offset;
3021 if (next == start) {
3022 /* replace with the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003023 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003024 result_s += to_len;
3025 start += from_len;
3026 } else {
3027 /* copy the unchanged old then the 'to' */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003028 Py_MEMCPY(result_s, start, next-start);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003029 result_s += (next-start);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003030 Py_MEMCPY(result_s, to_s, to_len);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003031 result_s += to_len;
3032 start = next+from_len;
3033 }
3034 }
3035 /* Copy the remainder of the remaining string */
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003036 Py_MEMCPY(result_s, start, end-start);
Neal Norwitza7edb112006-07-30 06:59:13 +00003037
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003038 return result;
3039}
3040
3041
Fredrik Lundh7c940d12006-05-26 16:32:42 +00003042Py_LOCAL(PyStringObject *)
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003043replace(PyStringObject *self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003044 const char *from_s, Py_ssize_t from_len,
3045 const char *to_s, Py_ssize_t to_len,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003046 Py_ssize_t maxcount)
3047{
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003048 if (maxcount < 0) {
3049 maxcount = PY_SSIZE_T_MAX;
3050 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3051 /* nothing to do; return the original string */
3052 return return_self(self);
3053 }
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003054
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003055 if (maxcount == 0 ||
3056 (from_len == 0 && to_len == 0)) {
3057 /* nothing to do; return the original string */
3058 return return_self(self);
3059 }
3060
3061 /* Handle zero-length special cases */
Neal Norwitza7edb112006-07-30 06:59:13 +00003062
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003063 if (from_len == 0) {
3064 /* insert the 'to' string everywhere. */
3065 /* >>> "Python".replace("", ".") */
3066 /* '.P.y.t.h.o.n.' */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003067 return replace_interleave(self, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003068 }
3069
3070 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3071 /* point for an empty self string to generate a non-empty string */
3072 /* Special case so the remaining code always gets a non-empty string */
3073 if (PyString_GET_SIZE(self) == 0) {
3074 return return_self(self);
3075 }
3076
3077 if (to_len == 0) {
3078 /* delete all occurances of 'from' string */
3079 if (from_len == 1) {
3080 return replace_delete_single_character(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003081 self, from_s[0], maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003082 } else {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003083 return replace_delete_substring(self, from_s, from_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003084 }
3085 }
3086
3087 /* Handle special case where both strings have the same length */
3088
3089 if (from_len == to_len) {
3090 if (from_len == 1) {
3091 return replace_single_character_in_place(
3092 self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003093 from_s[0],
3094 to_s[0],
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003095 maxcount);
3096 } else {
3097 return replace_substring_in_place(
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003098 self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003099 }
3100 }
3101
3102 /* Otherwise use the more generic algorithms */
3103 if (from_len == 1) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003104 return replace_single_character(self, from_s[0],
3105 to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003106 } else {
3107 /* len('from')>=2, len('to')>=1 */
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003108 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003109 }
3110}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003111
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003112PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00003113"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003114\n\
3115Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00003116old replaced by new. If the optional argument count is\n\
3117given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003118
3119static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003120string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003121{
Thomas Woutersdc5f8082006-04-19 15:38:01 +00003122 Py_ssize_t count = -1;
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003123 PyObject *from, *to;
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003124 const char *from_s, *to_s;
3125 Py_ssize_t from_len, to_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003126
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003127 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003128 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003129
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003130 if (PyString_Check(from)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003131 from_s = PyString_AS_STRING(from);
3132 from_len = PyString_GET_SIZE(from);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003133 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003134#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003135 if (PyUnicode_Check(from))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003136 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003137 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003138#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003139 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003140 return NULL;
3141
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003142 if (PyString_Check(to)) {
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003143 to_s = PyString_AS_STRING(to);
3144 to_len = PyString_GET_SIZE(to);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003145 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003146#ifdef Py_USING_UNICODE
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003147 else if (PyUnicode_Check(to))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003148 return PyUnicode_Replace((PyObject *)self,
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003149 from, to, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003150#endif
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003151 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003152 return NULL;
3153
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003154 return (PyObject *)replace((PyStringObject *) self,
Neal Norwitzf71ec5a2006-07-30 06:57:04 +00003155 from_s, from_len,
3156 to_s, to_len, count);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003157}
3158
Fredrik Lundhe68955c2006-05-25 17:08:14 +00003159/** End DALKE **/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003160
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003161/* Matches the end (direction >= 0) or start (direction < 0) of self
Georg Brandl24250812006-06-09 18:45:48 +00003162 * against substr, using the start and end arguments. Returns
3163 * -1 on error, 0 if not found and 1 if found.
3164 */
3165Py_LOCAL(int)
3166_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3167 Py_ssize_t end, int direction)
3168{
3169 Py_ssize_t len = PyString_GET_SIZE(self);
3170 Py_ssize_t slen;
3171 const char* sub;
3172 const char* str;
3173
3174 if (PyString_Check(substr)) {
3175 sub = PyString_AS_STRING(substr);
3176 slen = PyString_GET_SIZE(substr);
3177 }
3178#ifdef Py_USING_UNICODE
3179 else if (PyUnicode_Check(substr))
3180 return PyUnicode_Tailmatch((PyObject *)self,
3181 substr, start, end, direction);
3182#endif
3183 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3184 return -1;
3185 str = PyString_AS_STRING(self);
3186
3187 string_adjust_indices(&start, &end, len);
3188
3189 if (direction < 0) {
3190 /* startswith */
3191 if (start+slen > len)
3192 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003193 } else {
3194 /* endswith */
3195 if (end-start < slen || start > len)
3196 return 0;
3197
3198 if (end-slen > start)
3199 start = end - slen;
Georg Brandl24250812006-06-09 18:45:48 +00003200 }
Neal Norwitz8e6675a2006-06-11 05:47:14 +00003201 if (end-start >= slen)
3202 return ! memcmp(str+start, sub, slen);
3203 return 0;
Georg Brandl24250812006-06-09 18:45:48 +00003204}
3205
3206
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003207PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003208"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003209\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003210Return True if S starts with the specified prefix, False otherwise.\n\
3211With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003212With optional end, stop comparing S at that position.\n\
3213prefix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003214
3215static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003216string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003217{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003218 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003219 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003220 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003221 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003222
Guido van Rossumc6821402000-05-08 14:08:05 +00003223 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3224 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003225 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003226 if (PyTuple_Check(subobj)) {
3227 Py_ssize_t i;
3228 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3229 result = _string_tailmatch(self,
3230 PyTuple_GET_ITEM(subobj, i),
3231 start, end, -1);
3232 if (result == -1)
3233 return NULL;
3234 else if (result) {
3235 Py_RETURN_TRUE;
3236 }
3237 }
3238 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003239 }
Georg Brandl24250812006-06-09 18:45:48 +00003240 result = _string_tailmatch(self, subobj, start, end, -1);
3241 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003242 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003243 else
Georg Brandl24250812006-06-09 18:45:48 +00003244 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003245}
3246
3247
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003248PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003249"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003250\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00003251Return True if S ends with the specified suffix, False otherwise.\n\
3252With optional start, test S beginning at that position.\n\
Georg Brandl24250812006-06-09 18:45:48 +00003253With optional end, stop comparing S at that position.\n\
3254suffix can also be a tuple of strings to try.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003255
3256static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003257string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003258{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003259 Py_ssize_t start = 0;
Martin v. Löwis8ce358f2006-04-13 07:22:51 +00003260 Py_ssize_t end = PY_SSIZE_T_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003261 PyObject *subobj;
Georg Brandl24250812006-06-09 18:45:48 +00003262 int result;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003263
Guido van Rossumc6821402000-05-08 14:08:05 +00003264 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3265 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003266 return NULL;
Georg Brandl24250812006-06-09 18:45:48 +00003267 if (PyTuple_Check(subobj)) {
3268 Py_ssize_t i;
3269 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3270 result = _string_tailmatch(self,
3271 PyTuple_GET_ITEM(subobj, i),
3272 start, end, +1);
3273 if (result == -1)
3274 return NULL;
3275 else if (result) {
3276 Py_RETURN_TRUE;
3277 }
3278 }
3279 Py_RETURN_FALSE;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003280 }
Georg Brandl24250812006-06-09 18:45:48 +00003281 result = _string_tailmatch(self, subobj, start, end, +1);
3282 if (result == -1)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003283 return NULL;
Neal Norwitz1f68fc72002-06-14 00:50:42 +00003284 else
Georg Brandl24250812006-06-09 18:45:48 +00003285 return PyBool_FromLong(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003286}
3287
3288
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003289PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003290"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003291\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003292Encodes S using the codec registered for encoding. encoding defaults\n\
3293to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003294handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003295a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3296'xmlcharrefreplace' as well as any other name registered with\n\
3297codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003298
3299static PyObject *
3300string_encode(PyStringObject *self, PyObject *args)
3301{
3302 char *encoding = NULL;
3303 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003304 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003305
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003306 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3307 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003308 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003309 if (v == NULL)
3310 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003311 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3312 PyErr_Format(PyExc_TypeError,
3313 "encoder did not return a string/unicode object "
3314 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003315 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003316 Py_DECREF(v);
3317 return NULL;
3318 }
3319 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003320
3321 onError:
3322 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003323}
3324
3325
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003326PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003327"S.decode([encoding[,errors]]) -> object\n\
3328\n\
3329Decodes S using the codec registered for encoding. encoding defaults\n\
3330to the default encoding. errors may be given to set a different error\n\
3331handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00003332a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3333as well as any other name registerd with codecs.register_error that is\n\
3334able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003335
3336static PyObject *
3337string_decode(PyStringObject *self, PyObject *args)
3338{
3339 char *encoding = NULL;
3340 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003341 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00003342
Marc-André Lemburg2d920412001-05-15 12:00:02 +00003343 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3344 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003345 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003346 if (v == NULL)
3347 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003348 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3349 PyErr_Format(PyExc_TypeError,
3350 "decoder did not return a string/unicode object "
3351 "(type=%.400s)",
Christian Heimese93237d2007-12-19 02:37:44 +00003352 Py_TYPE(v)->tp_name);
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00003353 Py_DECREF(v);
3354 return NULL;
3355 }
3356 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00003357
3358 onError:
3359 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00003360}
3361
3362
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003363PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003364"S.expandtabs([tabsize]) -> string\n\
3365\n\
3366Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003367If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003368
3369static PyObject*
3370string_expandtabs(PyStringObject *self, PyObject *args)
3371{
3372 const char *e, *p;
3373 char *q;
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003374 Py_ssize_t i, j, old_j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003375 PyObject *u;
3376 int tabsize = 8;
3377
3378 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3379 return NULL;
3380
Thomas Wouters7e474022000-07-16 12:04:32 +00003381 /* First pass: determine size of output string */
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003382 i = j = old_j = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003383 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3384 for (p = PyString_AS_STRING(self); p < e; p++)
3385 if (*p == '\t') {
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003386 if (tabsize > 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003387 j += tabsize - (j % tabsize);
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003388 if (old_j > j) {
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003389 PyErr_SetString(PyExc_OverflowError,
3390 "new string is too long");
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003391 return NULL;
3392 }
3393 old_j = j;
3394 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003395 }
3396 else {
3397 j++;
3398 if (*p == '\n' || *p == '\r') {
3399 i += j;
Neal Norwitz5c9a81a2007-06-11 02:16:10 +00003400 old_j = j = 0;
3401 if (i < 0) {
3402 PyErr_SetString(PyExc_OverflowError,
3403 "new string is too long");
3404 return NULL;
3405 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00003406 }
3407 }
3408
Neal Norwitz7dbd2a32007-06-09 03:36:34 +00003409 if ((i + j) < 0) {
3410 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3411 return NULL;
3412 }
3413
Guido van Rossum4c08d552000-03-10 22:55:18 +00003414 /* Second pass: create output string and fill it */
3415 u = PyString_FromStringAndSize(NULL, i + j);
3416 if (!u)
3417 return NULL;
3418
3419 j = 0;
3420 q = PyString_AS_STRING(u);
3421
3422 for (p = PyString_AS_STRING(self); p < e; p++)
3423 if (*p == '\t') {
3424 if (tabsize > 0) {
3425 i = tabsize - (j % tabsize);
3426 j += i;
3427 while (i--)
3428 *q++ = ' ';
3429 }
3430 }
3431 else {
3432 j++;
3433 *q++ = *p;
3434 if (*p == '\n' || *p == '\r')
3435 j = 0;
3436 }
3437
3438 return u;
3439}
3440
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00003441Py_LOCAL_INLINE(PyObject *)
Martin v. Löwis18e16552006-02-15 17:27:45 +00003442pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003443{
3444 PyObject *u;
3445
3446 if (left < 0)
3447 left = 0;
3448 if (right < 0)
3449 right = 0;
3450
Tim Peters8fa5dd02001-09-12 02:18:30 +00003451 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003452 Py_INCREF(self);
3453 return (PyObject *)self;
3454 }
3455
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003456 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00003457 left + PyString_GET_SIZE(self) + right);
3458 if (u) {
3459 if (left)
3460 memset(PyString_AS_STRING(u), fill, left);
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003461 Py_MEMCPY(PyString_AS_STRING(u) + left,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003462 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00003463 PyString_GET_SIZE(self));
3464 if (right)
3465 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3466 fill, right);
3467 }
3468
3469 return u;
3470}
3471
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003472PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003473"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003474"\n"
3475"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003476"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003477
3478static PyObject *
3479string_ljust(PyStringObject *self, PyObject *args)
3480{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003481 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003482 char fillchar = ' ';
3483
Thomas Wouters4abb3662006-04-19 14:50:15 +00003484 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003485 return NULL;
3486
Tim Peters8fa5dd02001-09-12 02:18:30 +00003487 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003488 Py_INCREF(self);
3489 return (PyObject*) self;
3490 }
3491
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003492 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003493}
3494
3495
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003496PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003497"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003498"\n"
3499"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003500"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003501
3502static PyObject *
3503string_rjust(PyStringObject *self, PyObject *args)
3504{
Thomas Wouters4abb3662006-04-19 14:50:15 +00003505 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003506 char fillchar = ' ';
3507
Thomas Wouters4abb3662006-04-19 14:50:15 +00003508 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003509 return NULL;
3510
Tim Peters8fa5dd02001-09-12 02:18:30 +00003511 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003512 Py_INCREF(self);
3513 return (PyObject*) self;
3514 }
3515
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003516 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003517}
3518
3519
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003520PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003521"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00003522"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003523"Return S centered in a string of length width. Padding is\n"
3524"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003525
3526static PyObject *
3527string_center(PyStringObject *self, PyObject *args)
3528{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003529 Py_ssize_t marg, left;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003530 Py_ssize_t width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003531 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00003532
Thomas Wouters4abb3662006-04-19 14:50:15 +00003533 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003534 return NULL;
3535
Tim Peters8fa5dd02001-09-12 02:18:30 +00003536 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003537 Py_INCREF(self);
3538 return (PyObject*) self;
3539 }
3540
3541 marg = width - PyString_GET_SIZE(self);
3542 left = marg / 2 + (marg & width & 1);
3543
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00003544 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003545}
3546
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003547PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00003548"S.zfill(width) -> string\n"
3549"\n"
3550"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003551"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00003552
3553static PyObject *
3554string_zfill(PyStringObject *self, PyObject *args)
3555{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003556 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00003557 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003558 char *p;
Thomas Wouters4abb3662006-04-19 14:50:15 +00003559 Py_ssize_t width;
Walter Dörwald068325e2002-04-15 13:36:47 +00003560
Thomas Wouters4abb3662006-04-19 14:50:15 +00003561 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00003562 return NULL;
3563
3564 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00003565 if (PyString_CheckExact(self)) {
3566 Py_INCREF(self);
3567 return (PyObject*) self;
3568 }
3569 else
3570 return PyString_FromStringAndSize(
3571 PyString_AS_STRING(self),
3572 PyString_GET_SIZE(self)
3573 );
Walter Dörwald068325e2002-04-15 13:36:47 +00003574 }
3575
3576 fill = width - PyString_GET_SIZE(self);
3577
3578 s = pad(self, fill, 0, '0');
3579
3580 if (s == NULL)
3581 return NULL;
3582
3583 p = PyString_AS_STRING(s);
3584 if (p[fill] == '+' || p[fill] == '-') {
3585 /* move sign to beginning of string */
3586 p[0] = p[fill];
3587 p[fill] = '0';
3588 }
3589
3590 return (PyObject*) s;
3591}
3592
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003593PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00003594"S.isspace() -> bool\n\
3595\n\
3596Return True if all characters in S are whitespace\n\
3597and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003598
3599static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003600string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003601{
Fred Drakeba096332000-07-09 07:04:36 +00003602 register const unsigned char *p
3603 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003604 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003605
Guido van Rossum4c08d552000-03-10 22:55:18 +00003606 /* Shortcut for single character strings */
3607 if (PyString_GET_SIZE(self) == 1 &&
3608 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003609 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003610
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003611 /* Special case for empty strings */
3612 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003613 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003614
Guido van Rossum4c08d552000-03-10 22:55:18 +00003615 e = p + PyString_GET_SIZE(self);
3616 for (; p < e; p++) {
3617 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003618 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003619 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003620 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003621}
3622
3623
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003624PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003625"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003626\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003627Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003628and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003629
3630static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003631string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003632{
Fred Drakeba096332000-07-09 07:04:36 +00003633 register const unsigned char *p
3634 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003635 register const unsigned char *e;
3636
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003637 /* Shortcut for single character strings */
3638 if (PyString_GET_SIZE(self) == 1 &&
3639 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003640 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003641
3642 /* Special case for empty strings */
3643 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003644 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003645
3646 e = p + PyString_GET_SIZE(self);
3647 for (; p < e; p++) {
3648 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003649 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003650 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003651 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003652}
3653
3654
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003655PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003656"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003657\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003658Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003659and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003660
3661static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003662string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003663{
Fred Drakeba096332000-07-09 07:04:36 +00003664 register const unsigned char *p
3665 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003666 register const unsigned char *e;
3667
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003668 /* Shortcut for single character strings */
3669 if (PyString_GET_SIZE(self) == 1 &&
3670 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003671 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003672
3673 /* Special case for empty strings */
3674 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003675 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003676
3677 e = p + PyString_GET_SIZE(self);
3678 for (; p < e; p++) {
3679 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003680 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003681 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003682 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003683}
3684
3685
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003686PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003687"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003688\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003689Return True if all characters in S are digits\n\
3690and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003691
3692static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003693string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003694{
Fred Drakeba096332000-07-09 07:04:36 +00003695 register const unsigned char *p
3696 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003697 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003698
Guido van Rossum4c08d552000-03-10 22:55:18 +00003699 /* Shortcut for single character strings */
3700 if (PyString_GET_SIZE(self) == 1 &&
3701 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003702 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003703
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003704 /* Special case for empty strings */
3705 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003706 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003707
Guido van Rossum4c08d552000-03-10 22:55:18 +00003708 e = p + PyString_GET_SIZE(self);
3709 for (; p < e; p++) {
3710 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003711 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003712 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003713 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003714}
3715
3716
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003717PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003718"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003719\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003720Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003721at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003722
3723static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003724string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003725{
Fred Drakeba096332000-07-09 07:04:36 +00003726 register const unsigned char *p
3727 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003728 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003729 int cased;
3730
Guido van Rossum4c08d552000-03-10 22:55:18 +00003731 /* Shortcut for single character strings */
3732 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003733 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003734
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003735 /* Special case for empty strings */
3736 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003737 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003738
Guido van Rossum4c08d552000-03-10 22:55:18 +00003739 e = p + PyString_GET_SIZE(self);
3740 cased = 0;
3741 for (; p < e; p++) {
3742 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003743 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003744 else if (!cased && islower(*p))
3745 cased = 1;
3746 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003747 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003748}
3749
3750
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003751PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003752"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003753\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003754Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003755at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003756
3757static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003758string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003759{
Fred Drakeba096332000-07-09 07:04:36 +00003760 register const unsigned char *p
3761 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003762 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003763 int cased;
3764
Guido van Rossum4c08d552000-03-10 22:55:18 +00003765 /* Shortcut for single character strings */
3766 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003767 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003768
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003769 /* Special case for empty strings */
3770 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003771 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003772
Guido van Rossum4c08d552000-03-10 22:55:18 +00003773 e = p + PyString_GET_SIZE(self);
3774 cased = 0;
3775 for (; p < e; p++) {
3776 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003777 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003778 else if (!cased && isupper(*p))
3779 cased = 1;
3780 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003781 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003782}
3783
3784
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003785PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003786"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003787\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003788Return True if S is a titlecased string and there is at least one\n\
3789character in S, i.e. uppercase characters may only follow uncased\n\
3790characters and lowercase characters only cased ones. Return False\n\
3791otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003792
3793static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003794string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003795{
Fred Drakeba096332000-07-09 07:04:36 +00003796 register const unsigned char *p
3797 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003798 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003799 int cased, previous_is_cased;
3800
Guido van Rossum4c08d552000-03-10 22:55:18 +00003801 /* Shortcut for single character strings */
3802 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003803 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003804
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003805 /* Special case for empty strings */
3806 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003807 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003808
Guido van Rossum4c08d552000-03-10 22:55:18 +00003809 e = p + PyString_GET_SIZE(self);
3810 cased = 0;
3811 previous_is_cased = 0;
3812 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003813 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003814
3815 if (isupper(ch)) {
3816 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003817 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003818 previous_is_cased = 1;
3819 cased = 1;
3820 }
3821 else if (islower(ch)) {
3822 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003823 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003824 previous_is_cased = 1;
3825 cased = 1;
3826 }
3827 else
3828 previous_is_cased = 0;
3829 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003830 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003831}
3832
3833
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003834PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003835"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003836\n\
3837Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003838Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003839is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003840
Guido van Rossum4c08d552000-03-10 22:55:18 +00003841static PyObject*
3842string_splitlines(PyStringObject *self, PyObject *args)
3843{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003844 register Py_ssize_t i;
3845 register Py_ssize_t j;
3846 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003847 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003848 PyObject *list;
3849 PyObject *str;
3850 char *data;
3851
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003852 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003853 return NULL;
3854
3855 data = PyString_AS_STRING(self);
3856 len = PyString_GET_SIZE(self);
3857
Andrew Dalke7e0a62e2006-05-26 22:49:03 +00003858 /* This does not use the preallocated list because splitlines is
3859 usually run with hundreds of newlines. The overhead of
3860 switching between PyList_SET_ITEM and append causes about a
3861 2-3% slowdown for that common case. A smarter implementation
3862 could move the if check out, so the SET_ITEMs are done first
3863 and the appends only done when the prealloc buffer is full.
3864 That's too much work for little gain.*/
3865
Guido van Rossum4c08d552000-03-10 22:55:18 +00003866 list = PyList_New(0);
3867 if (!list)
3868 goto onError;
3869
3870 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003871 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003872
Guido van Rossum4c08d552000-03-10 22:55:18 +00003873 /* Find a line and append it */
3874 while (i < len && data[i] != '\n' && data[i] != '\r')
3875 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003876
3877 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003878 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003879 if (i < len) {
3880 if (data[i] == '\r' && i + 1 < len &&
3881 data[i+1] == '\n')
3882 i += 2;
3883 else
3884 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003885 if (keepends)
3886 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003887 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003888 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003889 j = i;
3890 }
3891 if (j < len) {
3892 SPLIT_APPEND(data, j, len);
3893 }
3894
3895 return list;
3896
3897 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003898 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003899 return NULL;
3900}
3901
3902#undef SPLIT_APPEND
Andrew Dalke525eab32006-05-26 14:00:45 +00003903#undef SPLIT_ADD
3904#undef MAX_PREALLOC
3905#undef PREALLOC_SIZE
Guido van Rossum4c08d552000-03-10 22:55:18 +00003906
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003907static PyObject *
3908string_getnewargs(PyStringObject *v)
3909{
Christian Heimese93237d2007-12-19 02:37:44 +00003910 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003911}
3912
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003913
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003914static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003915string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003916 /* Counterparts of the obsolete stropmodule functions; except
3917 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003918 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3919 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003920 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003921 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3922 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003923 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3924 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3925 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3926 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3927 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3928 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3929 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003930 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3931 capitalize__doc__},
3932 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3933 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3934 endswith__doc__},
Fredrik Lundh450277f2006-05-26 09:46:59 +00003935 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003936 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3937 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3938 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3939 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3940 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3941 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3942 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
Fredrik Lundhb3167cb2006-05-26 18:15:38 +00003943 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3944 rpartition__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003945 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3946 startswith__doc__},
3947 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3948 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3949 swapcase__doc__},
3950 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3951 translate__doc__},
3952 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3953 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3954 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3955 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3956 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3957 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3958 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3959 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3960 expandtabs__doc__},
3961 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3962 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003963 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003964 {NULL, NULL} /* sentinel */
3965};
3966
Jeremy Hylton938ace62002-07-17 16:30:39 +00003967static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003968str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3969
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003970static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003971string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003972{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003973 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003974 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003975
Guido van Rossumae960af2001-08-30 03:11:59 +00003976 if (type != &PyString_Type)
3977 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003978 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3979 return NULL;
3980 if (x == NULL)
3981 return PyString_FromString("");
3982 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003983}
3984
Guido van Rossumae960af2001-08-30 03:11:59 +00003985static PyObject *
3986str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3987{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003988 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003989 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003990
3991 assert(PyType_IsSubtype(type, &PyString_Type));
3992 tmp = string_new(&PyString_Type, args, kwds);
3993 if (tmp == NULL)
3994 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003995 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003996 n = PyString_GET_SIZE(tmp);
3997 pnew = type->tp_alloc(type, n);
3998 if (pnew != NULL) {
Fredrik Lundh80f8e802006-05-28 12:06:46 +00003999 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004000 ((PyStringObject *)pnew)->ob_shash =
4001 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004002 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00004003 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00004004 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00004005 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00004006}
4007
Guido van Rossumcacfc072002-05-24 19:01:59 +00004008static PyObject *
4009basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4010{
4011 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004012 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004013 return NULL;
4014}
4015
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004016static PyObject *
4017string_mod(PyObject *v, PyObject *w)
4018{
4019 if (!PyString_Check(v)) {
4020 Py_INCREF(Py_NotImplemented);
4021 return Py_NotImplemented;
4022 }
4023 return PyString_Format(v, w);
4024}
4025
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004026PyDoc_STRVAR(basestring_doc,
4027"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00004028
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004029static PyNumberMethods string_as_number = {
4030 0, /*nb_add*/
4031 0, /*nb_subtract*/
4032 0, /*nb_multiply*/
4033 0, /*nb_divide*/
4034 string_mod, /*nb_remainder*/
4035};
4036
4037
Guido van Rossumcacfc072002-05-24 19:01:59 +00004038PyTypeObject PyBaseString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004039 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00004040 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00004041 0,
4042 0,
4043 0, /* tp_dealloc */
4044 0, /* tp_print */
4045 0, /* tp_getattr */
4046 0, /* tp_setattr */
4047 0, /* tp_compare */
4048 0, /* tp_repr */
4049 0, /* tp_as_number */
4050 0, /* tp_as_sequence */
4051 0, /* tp_as_mapping */
4052 0, /* tp_hash */
4053 0, /* tp_call */
4054 0, /* tp_str */
4055 0, /* tp_getattro */
4056 0, /* tp_setattro */
4057 0, /* tp_as_buffer */
4058 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4059 basestring_doc, /* tp_doc */
4060 0, /* tp_traverse */
4061 0, /* tp_clear */
4062 0, /* tp_richcompare */
4063 0, /* tp_weaklistoffset */
4064 0, /* tp_iter */
4065 0, /* tp_iternext */
4066 0, /* tp_methods */
4067 0, /* tp_members */
4068 0, /* tp_getset */
4069 &PyBaseObject_Type, /* tp_base */
4070 0, /* tp_dict */
4071 0, /* tp_descr_get */
4072 0, /* tp_descr_set */
4073 0, /* tp_dictoffset */
4074 0, /* tp_init */
4075 0, /* tp_alloc */
4076 basestring_new, /* tp_new */
4077 0, /* tp_free */
4078};
4079
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004080PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00004081"str(object) -> string\n\
4082\n\
4083Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00004084If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00004085
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004086PyTypeObject PyString_Type = {
Martin v. Löwis68192102007-07-21 06:55:02 +00004087 PyVarObject_HEAD_INIT(&PyType_Type, 0)
Tim Peters6d6c1a32001-08-02 04:15:00 +00004088 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004089 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004090 sizeof(char),
Georg Brandl347b3002006-03-30 11:57:00 +00004091 string_dealloc, /* tp_dealloc */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004092 (printfunc)string_print, /* tp_print */
4093 0, /* tp_getattr */
4094 0, /* tp_setattr */
4095 0, /* tp_compare */
Georg Brandl347b3002006-03-30 11:57:00 +00004096 string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00004097 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004098 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00004099 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004100 (hashfunc)string_hash, /* tp_hash */
4101 0, /* tp_call */
Georg Brandl347b3002006-03-30 11:57:00 +00004102 string_str, /* tp_str */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004103 PyObject_GenericGetAttr, /* tp_getattro */
4104 0, /* tp_setattro */
4105 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00004106 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neal Norwitzee3a1b52007-02-25 19:44:48 +00004107 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004108 string_doc, /* tp_doc */
4109 0, /* tp_traverse */
4110 0, /* tp_clear */
4111 (richcmpfunc)string_richcompare, /* tp_richcompare */
4112 0, /* tp_weaklistoffset */
4113 0, /* tp_iter */
4114 0, /* tp_iternext */
4115 string_methods, /* tp_methods */
4116 0, /* tp_members */
4117 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00004118 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00004119 0, /* tp_dict */
4120 0, /* tp_descr_get */
4121 0, /* tp_descr_set */
4122 0, /* tp_dictoffset */
4123 0, /* tp_init */
4124 0, /* tp_alloc */
4125 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00004126 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004127};
4128
4129void
Fred Drakeba096332000-07-09 07:04:36 +00004130PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004131{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004132 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00004133 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004134 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004135 if (w == NULL || !PyString_Check(*pv)) {
4136 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00004137 *pv = NULL;
4138 return;
4139 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004140 v = string_concat((PyStringObject *) *pv, w);
4141 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004142 *pv = v;
4143}
4144
Guido van Rossum013142a1994-08-30 08:19:36 +00004145void
Fred Drakeba096332000-07-09 07:04:36 +00004146PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00004147{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004148 PyString_Concat(pv, w);
4149 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00004150}
4151
4152
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004153/* The following function breaks the notion that strings are immutable:
4154 it changes the size of a string. We get away with this only if there
4155 is only one module referencing the object. You can also think of it
4156 as creating a new string object and destroying the old one, only
4157 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00004158 already be known to some other part of the code...
4159 Note that if there's not enough memory to resize the string, the original
4160 string object at *pv is deallocated, *pv is set to NULL, an "out of
4161 memory" exception is set, and -1 is returned. Else (on success) 0 is
4162 returned, and the value in *pv may or may not be the same as on input.
4163 As always, an extra byte is allocated for a trailing \0 byte (newsize
4164 does *not* include that), and a trailing \0 byte is stored.
4165*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004166
4167int
Martin v. Löwis18e16552006-02-15 17:27:45 +00004168_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004169{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004170 register PyObject *v;
4171 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00004172 v = *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004173 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
Armin Rigo618fbf52004-08-07 20:58:32 +00004174 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004175 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004176 Py_DECREF(v);
4177 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004178 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004179 }
Guido van Rossum921842f1990-11-18 17:30:23 +00004180 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00004181 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004182 _Py_ForgetReference(v);
4183 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00004184 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004185 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00004186 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004187 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00004188 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004189 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004190 _Py_NewReference(*pv);
4191 sv = (PyStringObject *) *pv;
Christian Heimese93237d2007-12-19 02:37:44 +00004192 Py_SIZE(sv) = newsize;
Guido van Rossum921842f1990-11-18 17:30:23 +00004193 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00004194 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004195 return 0;
4196}
Guido van Rossume5372401993-03-16 12:15:04 +00004197
4198/* Helpers for formatstring */
4199
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004200Py_LOCAL_INLINE(PyObject *)
Thomas Wouters977485d2006-02-16 15:59:12 +00004201getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00004202{
Thomas Wouters977485d2006-02-16 15:59:12 +00004203 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00004204 if (argidx < arglen) {
4205 (*p_argidx)++;
4206 if (arglen < 0)
4207 return args;
4208 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004209 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00004210 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004211 PyErr_SetString(PyExc_TypeError,
4212 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00004213 return NULL;
4214}
4215
Tim Peters38fd5b62000-09-21 05:43:11 +00004216/* Format codes
4217 * F_LJUST '-'
4218 * F_SIGN '+'
4219 * F_BLANK ' '
4220 * F_ALT '#'
4221 * F_ZERO '0'
4222 */
Guido van Rossume5372401993-03-16 12:15:04 +00004223#define F_LJUST (1<<0)
4224#define F_SIGN (1<<1)
4225#define F_BLANK (1<<2)
4226#define F_ALT (1<<3)
4227#define F_ZERO (1<<4)
4228
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004229Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004230formatfloat(char *buf, size_t buflen, int flags,
4231 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004232{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004233 /* fmt = '%#.' + `prec` + `type`
4234 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00004235 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00004236 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004237 x = PyFloat_AsDouble(v);
4238 if (x == -1.0 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004239 PyErr_Format(PyExc_TypeError, "float argument required, "
Christian Heimese93237d2007-12-19 02:37:44 +00004240 "not %.200s", Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004241 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004242 }
Guido van Rossume5372401993-03-16 12:15:04 +00004243 if (prec < 0)
4244 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00004245 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4246 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004247 /* Worst case length calc to ensure no buffer overrun:
4248
4249 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004250 fmt = %#.<prec>g
4251 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004252 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004253 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004254
4255 'f' formats:
4256 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4257 len = 1 + 50 + 1 + prec = 52 + prec
4258
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004259 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00004260 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004261
4262 */
Georg Brandl7c3b50d2007-07-12 08:38:00 +00004263 if (((type == 'g' || type == 'G') &&
4264 buflen <= (size_t)10 + (size_t)prec) ||
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004265 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004266 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00004267 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004268 return -1;
4269 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00004270 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4271 (flags&F_ALT) ? "#" : "",
4272 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00004273 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004274 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004275}
4276
Tim Peters38fd5b62000-09-21 05:43:11 +00004277/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4278 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4279 * Python's regular ints.
4280 * Return value: a new PyString*, or NULL if error.
4281 * . *pbuf is set to point into it,
4282 * *plen set to the # of chars following that.
4283 * Caller must decref it when done using pbuf.
4284 * The string starting at *pbuf is of the form
4285 * "-"? ("0x" | "0X")? digit+
4286 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004287 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00004288 * There will be at least prec digits, zero-filled on the left if
4289 * necessary to get that many.
4290 * val object to be converted
4291 * flags bitmask of format flags; only F_ALT is looked at
4292 * prec minimum number of digits; 0-fill on left if needed
4293 * type a character in [duoxX]; u acts the same as d
4294 *
4295 * CAUTION: o, x and X conversions on regular ints can never
4296 * produce a '-' sign, but can for Python's unbounded ints.
4297 */
4298PyObject*
4299_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4300 char **pbuf, int *plen)
4301{
4302 PyObject *result = NULL;
4303 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004304 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00004305 int sign; /* 1 if '-', else 0 */
4306 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00004307 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004308 int numdigits; /* len == numnondigits + numdigits */
4309 int numnondigits = 0;
4310
4311 switch (type) {
4312 case 'd':
4313 case 'u':
Christian Heimese93237d2007-12-19 02:37:44 +00004314 result = Py_TYPE(val)->tp_str(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004315 break;
4316 case 'o':
Christian Heimese93237d2007-12-19 02:37:44 +00004317 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004318 break;
4319 case 'x':
4320 case 'X':
4321 numnondigits = 2;
Christian Heimese93237d2007-12-19 02:37:44 +00004322 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
Tim Peters38fd5b62000-09-21 05:43:11 +00004323 break;
4324 default:
4325 assert(!"'type' not in [duoxX]");
4326 }
4327 if (!result)
4328 return NULL;
4329
Neal Norwitz56423e52006-08-13 18:11:08 +00004330 buf = PyString_AsString(result);
Georg Brandl26a07b52006-08-14 20:25:39 +00004331 if (!buf) {
4332 Py_DECREF(result);
Neal Norwitz56423e52006-08-13 18:11:08 +00004333 return NULL;
Georg Brandl26a07b52006-08-14 20:25:39 +00004334 }
Neal Norwitz56423e52006-08-13 18:11:08 +00004335
Tim Peters38fd5b62000-09-21 05:43:11 +00004336 /* To modify the string in-place, there can only be one reference. */
Christian Heimese93237d2007-12-19 02:37:44 +00004337 if (Py_REFCNT(result) != 1) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004338 PyErr_BadInternalCall();
4339 return NULL;
4340 }
Martin v. Löwis725507b2006-03-07 12:08:51 +00004341 llen = PyString_Size(result);
Armin Rigo7ccbca92006-10-04 12:17:45 +00004342 if (llen > INT_MAX) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004343 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4344 return NULL;
4345 }
4346 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004347 if (buf[len-1] == 'L') {
4348 --len;
4349 buf[len] = '\0';
4350 }
4351 sign = buf[0] == '-';
4352 numnondigits += sign;
4353 numdigits = len - numnondigits;
4354 assert(numdigits > 0);
4355
Tim Petersfff53252001-04-12 18:38:48 +00004356 /* Get rid of base marker unless F_ALT */
4357 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00004358 /* Need to skip 0x, 0X or 0. */
4359 int skipped = 0;
4360 switch (type) {
4361 case 'o':
4362 assert(buf[sign] == '0');
4363 /* If 0 is only digit, leave it alone. */
4364 if (numdigits > 1) {
4365 skipped = 1;
4366 --numdigits;
4367 }
4368 break;
4369 case 'x':
4370 case 'X':
4371 assert(buf[sign] == '0');
4372 assert(buf[sign + 1] == 'x');
4373 skipped = 2;
4374 numnondigits -= 2;
4375 break;
4376 }
4377 if (skipped) {
4378 buf += skipped;
4379 len -= skipped;
4380 if (sign)
4381 buf[0] = '-';
4382 }
4383 assert(len == numnondigits + numdigits);
4384 assert(numdigits > 0);
4385 }
4386
4387 /* Fill with leading zeroes to meet minimum width. */
4388 if (prec > numdigits) {
4389 PyObject *r1 = PyString_FromStringAndSize(NULL,
4390 numnondigits + prec);
4391 char *b1;
4392 if (!r1) {
4393 Py_DECREF(result);
4394 return NULL;
4395 }
4396 b1 = PyString_AS_STRING(r1);
4397 for (i = 0; i < numnondigits; ++i)
4398 *b1++ = *buf++;
4399 for (i = 0; i < prec - numdigits; i++)
4400 *b1++ = '0';
4401 for (i = 0; i < numdigits; i++)
4402 *b1++ = *buf++;
4403 *b1 = '\0';
4404 Py_DECREF(result);
4405 result = r1;
4406 buf = PyString_AS_STRING(result);
4407 len = numnondigits + prec;
4408 }
4409
4410 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00004411 if (type == 'X') {
4412 /* Need to convert all lower case letters to upper case.
4413 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00004414 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00004415 if (buf[i] >= 'a' && buf[i] <= 'x')
4416 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00004417 }
4418 *pbuf = buf;
4419 *plen = len;
4420 return result;
4421}
4422
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004423Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004424formatint(char *buf, size_t buflen, int flags,
4425 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004426{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004427 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00004428 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4429 + 1 + 1 = 24 */
4430 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004431 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004432 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004433
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004434 x = PyInt_AsLong(v);
4435 if (x == -1 && PyErr_Occurred()) {
Georg Brandl283a1352006-11-19 08:48:30 +00004436 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
Christian Heimese93237d2007-12-19 02:37:44 +00004437 Py_TYPE(v)->tp_name);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004438 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00004439 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004440 if (x < 0 && type == 'u') {
4441 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00004442 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004443 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4444 sign = "-";
4445 else
4446 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00004447 if (prec < 0)
4448 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004449
4450 if ((flags & F_ALT) &&
4451 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004452 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004453 * of issues that cause pain:
4454 * - when 0 is being converted, the C standard leaves off
4455 * the '0x' or '0X', which is inconsistent with other
4456 * %#x/%#X conversions and inconsistent with Python's
4457 * hex() function
4458 * - there are platforms that violate the standard and
4459 * convert 0 with the '0x' or '0X'
4460 * (Metrowerks, Compaq Tru64)
4461 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004462 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004463 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004464 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004465 * We can achieve the desired consistency by inserting our
4466 * own '0x' or '0X' prefix, and substituting %x/%X in place
4467 * of %#x/%#X.
4468 *
4469 * Note that this is the same approach as used in
4470 * formatint() in unicodeobject.c
4471 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004472 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4473 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004474 }
4475 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004476 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4477 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004478 prec, type);
4479 }
4480
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004481 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4482 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00004483 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004484 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004485 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004486 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004487 return -1;
4488 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004489 if (sign[0])
4490 PyOS_snprintf(buf, buflen, fmt, -x);
4491 else
4492 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00004493 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00004494}
4495
Fredrik Lundhc2d29c52006-05-27 14:58:20 +00004496Py_LOCAL_INLINE(int)
Fred Drakeba096332000-07-09 07:04:36 +00004497formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00004498{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004499 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004500 if (PyString_Check(v)) {
4501 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004502 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004503 }
4504 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004505 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004506 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004507 }
4508 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004509 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00004510}
4511
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004512/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4513
4514 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4515 chars are formatted. XXX This is a magic number. Each formatting
4516 routine does bounds checking to ensure no overflow, but a better
4517 solution may be to malloc a buffer of appropriate size for each
4518 format. For now, the current solution is sufficient.
4519*/
4520#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00004521
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004522PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004523PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00004524{
4525 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004526 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004527 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00004528 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004529 PyObject *result, *orig_args;
4530#ifdef Py_USING_UNICODE
4531 PyObject *v, *w;
4532#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004533 PyObject *dict = NULL;
4534 if (format == NULL || !PyString_Check(format) || args == NULL) {
4535 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00004536 return NULL;
4537 }
Guido van Rossum90daa872000-04-10 13:47:21 +00004538 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004539 fmt = PyString_AS_STRING(format);
4540 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004541 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004542 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00004543 if (result == NULL)
4544 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004545 res = PyString_AsString(result);
4546 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00004547 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00004548 argidx = 0;
4549 }
4550 else {
4551 arglen = -1;
4552 argidx = -2;
4553 }
Christian Heimese93237d2007-12-19 02:37:44 +00004554 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
Neal Norwitz80a1bf42002-11-12 23:01:12 +00004555 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00004556 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00004557 while (--fmtcnt >= 0) {
4558 if (*fmt != '%') {
4559 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004560 rescnt = fmtcnt + 100;
4561 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004562 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004563 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004564 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004565 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00004566 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004567 }
4568 *res++ = *fmt++;
4569 }
4570 else {
4571 /* Got a format specifier */
4572 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004573 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00004574 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00004575 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00004576 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004577 PyObject *v = NULL;
4578 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004579 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00004580 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004581 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004582 char formatbuf[FORMATBUFLEN];
4583 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004584#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004585 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00004586 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004587#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004588
Guido van Rossumda9c2711996-12-05 21:58:58 +00004589 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00004590 if (*fmt == '(') {
4591 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004592 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004593 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00004594 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004595
4596 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004597 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004598 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00004599 goto error;
4600 }
4601 ++fmt;
4602 --fmtcnt;
4603 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00004604 /* Skip over balanced parentheses */
4605 while (pcount > 0 && --fmtcnt >= 0) {
4606 if (*fmt == ')')
4607 --pcount;
4608 else if (*fmt == '(')
4609 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00004610 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00004611 }
4612 keylen = fmt - keystart - 1;
4613 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004614 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00004615 "incomplete format key");
4616 goto error;
4617 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004618 key = PyString_FromStringAndSize(keystart,
4619 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00004620 if (key == NULL)
4621 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00004622 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004623 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00004624 args_owned = 0;
4625 }
4626 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004627 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00004628 if (args == NULL) {
4629 goto error;
4630 }
Guido van Rossum993952b1996-05-21 22:44:20 +00004631 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00004632 arglen = -1;
4633 argidx = -2;
4634 }
Guido van Rossume5372401993-03-16 12:15:04 +00004635 while (--fmtcnt >= 0) {
4636 switch (c = *fmt++) {
4637 case '-': flags |= F_LJUST; continue;
4638 case '+': flags |= F_SIGN; continue;
4639 case ' ': flags |= F_BLANK; continue;
4640 case '#': flags |= F_ALT; continue;
4641 case '0': flags |= F_ZERO; continue;
4642 }
4643 break;
4644 }
4645 if (c == '*') {
4646 v = getnextarg(args, arglen, &argidx);
4647 if (v == NULL)
4648 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004649 if (!PyInt_Check(v)) {
4650 PyErr_SetString(PyExc_TypeError,
4651 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004652 goto error;
4653 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004654 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004655 if (width < 0) {
4656 flags |= F_LJUST;
4657 width = -width;
4658 }
Guido van Rossume5372401993-03-16 12:15:04 +00004659 if (--fmtcnt >= 0)
4660 c = *fmt++;
4661 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004662 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004663 width = c - '0';
4664 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004665 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004666 if (!isdigit(c))
4667 break;
4668 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004669 PyErr_SetString(
4670 PyExc_ValueError,
4671 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004672 goto error;
4673 }
4674 width = width*10 + (c - '0');
4675 }
4676 }
4677 if (c == '.') {
4678 prec = 0;
4679 if (--fmtcnt >= 0)
4680 c = *fmt++;
4681 if (c == '*') {
4682 v = getnextarg(args, arglen, &argidx);
4683 if (v == NULL)
4684 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004685 if (!PyInt_Check(v)) {
4686 PyErr_SetString(
4687 PyExc_TypeError,
4688 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004689 goto error;
4690 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004691 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004692 if (prec < 0)
4693 prec = 0;
4694 if (--fmtcnt >= 0)
4695 c = *fmt++;
4696 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004697 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004698 prec = c - '0';
4699 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004700 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004701 if (!isdigit(c))
4702 break;
4703 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004704 PyErr_SetString(
4705 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004706 "prec too big");
4707 goto error;
4708 }
4709 prec = prec*10 + (c - '0');
4710 }
4711 }
4712 } /* prec */
4713 if (fmtcnt >= 0) {
4714 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004715 if (--fmtcnt >= 0)
4716 c = *fmt++;
4717 }
4718 }
4719 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004720 PyErr_SetString(PyExc_ValueError,
4721 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004722 goto error;
4723 }
4724 if (c != '%') {
4725 v = getnextarg(args, arglen, &argidx);
4726 if (v == NULL)
4727 goto error;
4728 }
4729 sign = 0;
4730 fill = ' ';
4731 switch (c) {
4732 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004733 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004734 len = 1;
4735 break;
4736 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004737#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004738 if (PyUnicode_Check(v)) {
4739 fmt = fmt_start;
4740 argidx = argidx_start;
4741 goto unicode;
4742 }
Georg Brandld45014b2005-10-01 17:06:00 +00004743#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004744 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004745#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004746 if (temp != NULL && PyUnicode_Check(temp)) {
4747 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004748 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004749 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004750 goto unicode;
4751 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004752#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004753 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004754 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004755 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004756 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004757 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004758 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004759 if (!PyString_Check(temp)) {
4760 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004761 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004762 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004763 goto error;
4764 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004765 pbuf = PyString_AS_STRING(temp);
4766 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004767 if (prec >= 0 && len > prec)
4768 len = prec;
4769 break;
4770 case 'i':
4771 case 'd':
4772 case 'u':
4773 case 'o':
4774 case 'x':
4775 case 'X':
4776 if (c == 'i')
4777 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004778 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004779 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004780 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004781 prec, c, &pbuf, &ilen);
4782 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004783 if (!temp)
4784 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004785 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004786 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004787 else {
4788 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004789 len = formatint(pbuf,
4790 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004791 flags, prec, c, v);
4792 if (len < 0)
4793 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004794 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004795 }
4796 if (flags & F_ZERO)
4797 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004798 break;
4799 case 'e':
4800 case 'E':
4801 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004802 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004803 case 'g':
4804 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004805 if (c == 'F')
4806 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004807 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004808 len = formatfloat(pbuf, sizeof(formatbuf),
4809 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004810 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004811 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004812 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004813 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004814 fill = '0';
4815 break;
4816 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004817#ifdef Py_USING_UNICODE
4818 if (PyUnicode_Check(v)) {
4819 fmt = fmt_start;
4820 argidx = argidx_start;
4821 goto unicode;
4822 }
4823#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004824 pbuf = formatbuf;
4825 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004826 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004827 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004828 break;
4829 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004830 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004831 "unsupported format character '%c' (0x%x) "
Armin Rigo7ccbca92006-10-04 12:17:45 +00004832 "at index %zd",
Guido van Rossumefc11882002-09-12 14:43:41 +00004833 c, c,
Armin Rigo7ccbca92006-10-04 12:17:45 +00004834 (Py_ssize_t)(fmt - 1 -
4835 PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004836 goto error;
4837 }
4838 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004839 if (*pbuf == '-' || *pbuf == '+') {
4840 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004841 len--;
4842 }
4843 else if (flags & F_SIGN)
4844 sign = '+';
4845 else if (flags & F_BLANK)
4846 sign = ' ';
4847 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004848 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004849 }
4850 if (width < len)
4851 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004852 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004853 reslen -= rescnt;
4854 rescnt = width + fmtcnt + 100;
4855 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004856 if (reslen < 0) {
4857 Py_DECREF(result);
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004858 Py_XDECREF(temp);
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004859 return PyErr_NoMemory();
4860 }
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004861 if (_PyString_Resize(&result, reslen) < 0) {
4862 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004863 return NULL;
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004864 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004865 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004866 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004867 }
4868 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004869 if (fill != ' ')
4870 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004871 rescnt--;
4872 if (width > len)
4873 width--;
4874 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004875 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4876 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004877 assert(pbuf[1] == c);
4878 if (fill != ' ') {
4879 *res++ = *pbuf++;
4880 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004881 }
Tim Petersfff53252001-04-12 18:38:48 +00004882 rescnt -= 2;
4883 width -= 2;
4884 if (width < 0)
4885 width = 0;
4886 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004887 }
4888 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004889 do {
4890 --rescnt;
4891 *res++ = fill;
4892 } while (--width > len);
4893 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004894 if (fill == ' ') {
4895 if (sign)
4896 *res++ = sign;
4897 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004898 (c == 'x' || c == 'X')) {
4899 assert(pbuf[0] == '0');
4900 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004901 *res++ = *pbuf++;
4902 *res++ = *pbuf++;
4903 }
4904 }
Fredrik Lundh80f8e802006-05-28 12:06:46 +00004905 Py_MEMCPY(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004906 res += len;
4907 rescnt -= len;
4908 while (--width >= len) {
4909 --rescnt;
4910 *res++ = ' ';
4911 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004912 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004913 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004914 "not all arguments converted during string formatting");
Georg Brandl10a4b0e2007-02-26 13:51:29 +00004915 Py_XDECREF(temp);
Guido van Rossum013142a1994-08-30 08:19:36 +00004916 goto error;
4917 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004918 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004919 } /* '%' */
4920 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004921 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004922 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004923 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004924 goto error;
4925 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004926 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004927 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004928 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004929 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004930 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004931
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004932#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004933 unicode:
4934 if (args_owned) {
4935 Py_DECREF(args);
4936 args_owned = 0;
4937 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004938 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004939 if (PyTuple_Check(orig_args) && argidx > 0) {
4940 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004941 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004942 v = PyTuple_New(n);
4943 if (v == NULL)
4944 goto error;
4945 while (--n >= 0) {
4946 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4947 Py_INCREF(w);
4948 PyTuple_SET_ITEM(v, n, w);
4949 }
4950 args = v;
4951 } else {
4952 Py_INCREF(orig_args);
4953 args = orig_args;
4954 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004955 args_owned = 1;
4956 /* Take what we have of the result and let the Unicode formatting
4957 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004958 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004959 if (_PyString_Resize(&result, rescnt))
4960 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004961 fmtcnt = PyString_GET_SIZE(format) - \
4962 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004963 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4964 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004965 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004966 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004967 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004968 if (v == NULL)
4969 goto error;
4970 /* Paste what we have (result) to what the Unicode formatting
4971 function returned (v) and return the result (or error) */
4972 w = PyUnicode_Concat(result, v);
4973 Py_DECREF(result);
4974 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004975 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004976 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004977#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004978
Guido van Rossume5372401993-03-16 12:15:04 +00004979 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004980 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004981 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004982 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004983 }
Guido van Rossume5372401993-03-16 12:15:04 +00004984 return NULL;
4985}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004986
Guido van Rossum2a61e741997-01-18 07:55:05 +00004987void
Fred Drakeba096332000-07-09 07:04:36 +00004988PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004989{
4990 register PyStringObject *s = (PyStringObject *)(*p);
4991 PyObject *t;
4992 if (s == NULL || !PyString_Check(s))
4993 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004994 /* If it's a string subclass, we don't really know what putting
4995 it in the interned dict might do. */
4996 if (!PyString_CheckExact(s))
4997 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004998 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004999 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005000 if (interned == NULL) {
5001 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005002 if (interned == NULL) {
5003 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00005004 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005005 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00005006 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005007 t = PyDict_GetItem(interned, (PyObject *)s);
5008 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00005009 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005010 Py_DECREF(*p);
5011 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005012 return;
5013 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005014
Armin Rigo79f7ad22004-08-07 19:27:39 +00005015 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005016 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005017 return;
5018 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005019 /* The two references in interned are not counted by refcnt.
5020 The string deallocator will take care of this */
Christian Heimese93237d2007-12-19 02:37:44 +00005021 Py_REFCNT(s) -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00005022 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00005023}
5024
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005025void
5026PyString_InternImmortal(PyObject **p)
5027{
5028 PyString_InternInPlace(p);
5029 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5030 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5031 Py_INCREF(*p);
5032 }
5033}
5034
Guido van Rossum2a61e741997-01-18 07:55:05 +00005035
5036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00005037PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00005038{
5039 PyObject *s = PyString_FromString(cp);
5040 if (s == NULL)
5041 return NULL;
5042 PyString_InternInPlace(&s);
5043 return s;
5044}
5045
Guido van Rossum8cf04761997-08-02 02:57:45 +00005046void
Fred Drakeba096332000-07-09 07:04:36 +00005047PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00005048{
5049 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005050 for (i = 0; i < UCHAR_MAX + 1; i++) {
5051 Py_XDECREF(characters[i]);
5052 characters[i] = NULL;
5053 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00005054 Py_XDECREF(nullstring);
5055 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00005056}
Barry Warsawa903ad982001-02-23 16:40:48 +00005057
Barry Warsawa903ad982001-02-23 16:40:48 +00005058void _Py_ReleaseInternedStrings(void)
5059{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005060 PyObject *keys;
5061 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00005062 Py_ssize_t i, n;
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005063 Py_ssize_t immortal_size = 0, mortal_size = 0;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005064
5065 if (interned == NULL || !PyDict_Check(interned))
5066 return;
5067 keys = PyDict_Keys(interned);
5068 if (keys == NULL || !PyList_Check(keys)) {
5069 PyErr_Clear();
5070 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00005071 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005072
5073 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5074 detector, interned strings are not forcibly deallocated; rather, we
5075 give them their stolen references back, and then clear and DECREF
5076 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00005077
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005078 n = PyList_GET_SIZE(keys);
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005079 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5080 n);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005081 for (i = 0; i < n; i++) {
5082 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5083 switch (s->ob_sstate) {
5084 case SSTATE_NOT_INTERNED:
5085 /* XXX Shouldn't happen */
5086 break;
5087 case SSTATE_INTERNED_IMMORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005088 Py_REFCNT(s) += 1;
5089 immortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005090 break;
5091 case SSTATE_INTERNED_MORTAL:
Christian Heimese93237d2007-12-19 02:37:44 +00005092 Py_REFCNT(s) += 2;
5093 mortal_size += Py_SIZE(s);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005094 break;
5095 default:
5096 Py_FatalError("Inconsistent interned string state.");
5097 }
5098 s->ob_sstate = SSTATE_NOT_INTERNED;
5099 }
Neal Norwitz1c1a1c52007-02-25 15:52:27 +00005100 fprintf(stderr, "total size of all interned strings: "
5101 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5102 "mortal/immortal\n", mortal_size, immortal_size);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00005103 Py_DECREF(keys);
5104 PyDict_Clear(interned);
5105 Py_DECREF(interned);
5106 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00005107}