blob: e37b579ec560e0ef467dbf77c4b606fead1ef63a [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
7
8#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000012static PyStringObject *characters[UCHAR_MAX + 1];
13static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000014
15/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000027 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
29
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000031 string containing exactly `size' bytes.
32
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000034 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000036 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000037 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000039 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
43
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000048 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000050*/
51PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000053{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000054 register PyStringObject *op;
55 if (size < 0) {
56 PyErr_SetString(PyExc_SystemError,
57 "Negative size passed to PyString_FromStringAndSize");
58 return NULL;
59 }
60 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000061#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000062 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000063#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000064 Py_INCREF(op);
65 return (PyObject *)op;
66 }
67 if (size == 1 && str != NULL &&
68 (op = characters[*str & UCHAR_MAX]) != NULL)
69 {
Christian Heimes44720832008-05-26 13:01:01 +000070#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000071 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000072#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000073 Py_INCREF(op);
74 return (PyObject *)op;
75 }
Christian Heimes44720832008-05-26 13:01:01 +000076
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000077 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
78 PyErr_SetString(PyExc_OverflowError, "string is too large");
79 return NULL;
80 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000081
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000082 /* Inline PyObject_NewVar */
83 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
84 if (op == NULL)
85 return PyErr_NoMemory();
86 PyObject_INIT_VAR(op, &PyString_Type, size);
87 op->ob_shash = -1;
88 op->ob_sstate = SSTATE_NOT_INTERNED;
89 if (str != NULL)
90 Py_MEMCPY(op->ob_sval, str, size);
91 op->ob_sval[size] = '\0';
92 /* share short strings */
93 if (size == 0) {
94 PyObject *t = (PyObject *)op;
95 PyString_InternInPlace(&t);
96 op = (PyStringObject *)t;
97 nullstring = op;
98 Py_INCREF(op);
99 } else if (size == 1 && str != NULL) {
100 PyObject *t = (PyObject *)op;
101 PyString_InternInPlace(&t);
102 op = (PyStringObject *)t;
103 characters[*str & UCHAR_MAX] = op;
104 Py_INCREF(op);
105 }
106 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000107}
108
Christian Heimes44720832008-05-26 13:01:01 +0000109PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000110PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000111{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000112 register size_t size;
113 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000114
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000115 assert(str != NULL);
116 size = strlen(str);
117 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
118 PyErr_SetString(PyExc_OverflowError,
119 "string is too long for a Python string");
120 return NULL;
121 }
122 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000123#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000124 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000125#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
128 }
129 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000130#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000131 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000132#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000133 Py_INCREF(op);
134 return (PyObject *)op;
135 }
Christian Heimes44720832008-05-26 13:01:01 +0000136
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000137 /* Inline PyObject_NewVar */
138 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
139 if (op == NULL)
140 return PyErr_NoMemory();
141 PyObject_INIT_VAR(op, &PyString_Type, size);
142 op->ob_shash = -1;
143 op->ob_sstate = SSTATE_NOT_INTERNED;
144 Py_MEMCPY(op->ob_sval, str, size+1);
145 /* share short strings */
146 if (size == 0) {
147 PyObject *t = (PyObject *)op;
148 PyString_InternInPlace(&t);
149 op = (PyStringObject *)t;
150 nullstring = op;
151 Py_INCREF(op);
152 } else if (size == 1) {
153 PyObject *t = (PyObject *)op;
154 PyString_InternInPlace(&t);
155 op = (PyStringObject *)t;
156 characters[*str & UCHAR_MAX] = op;
157 Py_INCREF(op);
158 }
159 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000160}
161
Christian Heimes44720832008-05-26 13:01:01 +0000162PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000163PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000164{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000165 va_list count;
166 Py_ssize_t n = 0;
167 const char* f;
168 char *s;
169 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000170
Christian Heimes44720832008-05-26 13:01:01 +0000171#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000172 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000173#else
174#ifdef __va_copy
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000175 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000176#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000177 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000178#endif
179#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000180 /* step 1: figure out how large a buffer we need */
181 for (f = format; *f; f++) {
182 if (*f == '%') {
183 const char* p = f;
184 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
185 ;
Christian Heimes44720832008-05-26 13:01:01 +0000186
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000187 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
188 * they don't affect the amount of space we reserve.
189 */
190 if ((*f == 'l' || *f == 'z') &&
191 (f[1] == 'd' || f[1] == 'u'))
192 ++f;
Christian Heimes44720832008-05-26 13:01:01 +0000193
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000194 switch (*f) {
195 case 'c':
196 (void)va_arg(count, int);
197 /* fall through... */
198 case '%':
199 n++;
200 break;
201 case 'd': case 'u': case 'i': case 'x':
202 (void) va_arg(count, int);
203 /* 20 bytes is enough to hold a 64-bit
204 integer. Decimal takes the most space.
205 This isn't enough for octal. */
206 n += 20;
207 break;
208 case 's':
209 s = va_arg(count, char*);
210 n += strlen(s);
211 break;
212 case 'p':
213 (void) va_arg(count, int);
214 /* maximum 64-bit pointer representation:
215 * 0xffffffffffffffff
216 * so 19 characters is enough.
217 * XXX I count 18 -- what's the extra for?
218 */
219 n += 19;
220 break;
221 default:
222 /* if we stumble upon an unknown
223 formatting code, copy the rest of
224 the format string to the output
225 string. (we cannot just skip the
226 code, since there's no way to know
227 what's in the argument list) */
228 n += strlen(p);
229 goto expand;
230 }
231 } else
232 n++;
233 }
Christian Heimes44720832008-05-26 13:01:01 +0000234 expand:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000235 /* step 2: fill the buffer */
236 /* Since we've analyzed how much space we need for the worst case,
237 use sprintf directly instead of the slower PyOS_snprintf. */
238 string = PyString_FromStringAndSize(NULL, n);
239 if (!string)
240 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000241
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000242 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000243
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000244 for (f = format; *f; f++) {
245 if (*f == '%') {
246 const char* p = f++;
247 Py_ssize_t i;
248 int longflag = 0;
249 int size_tflag = 0;
250 /* parse the width.precision part (we're only
251 interested in the precision value, if any) */
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 if (*f == '.') {
256 f++;
257 n = 0;
258 while (isdigit(Py_CHARMASK(*f)))
259 n = (n*10) + *f++ - '0';
260 }
261 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
262 f++;
263 /* handle the long flag, but only for %ld and %lu.
264 others can be added when necessary. */
265 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
266 longflag = 1;
267 ++f;
268 }
269 /* handle the size_t flag. */
270 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
271 size_tflag = 1;
272 ++f;
273 }
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000275 switch (*f) {
276 case 'c':
277 *s++ = va_arg(vargs, int);
278 break;
279 case 'd':
280 if (longflag)
281 sprintf(s, "%ld", va_arg(vargs, long));
282 else if (size_tflag)
283 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
284 va_arg(vargs, Py_ssize_t));
285 else
286 sprintf(s, "%d", va_arg(vargs, int));
287 s += strlen(s);
288 break;
289 case 'u':
290 if (longflag)
291 sprintf(s, "%lu",
292 va_arg(vargs, unsigned long));
293 else if (size_tflag)
294 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
295 va_arg(vargs, size_t));
296 else
297 sprintf(s, "%u",
298 va_arg(vargs, unsigned int));
299 s += strlen(s);
300 break;
301 case 'i':
302 sprintf(s, "%i", va_arg(vargs, int));
303 s += strlen(s);
304 break;
305 case 'x':
306 sprintf(s, "%x", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 's':
310 p = va_arg(vargs, char*);
311 i = strlen(p);
312 if (n > 0 && i > n)
313 i = n;
314 Py_MEMCPY(s, p, i);
315 s += i;
316 break;
317 case 'p':
318 sprintf(s, "%p", va_arg(vargs, void*));
319 /* %p is ill-defined: ensure leading 0x. */
320 if (s[1] == 'X')
321 s[1] = 'x';
322 else if (s[1] != 'x') {
323 memmove(s+2, s, strlen(s)+1);
324 s[0] = '0';
325 s[1] = 'x';
326 }
327 s += strlen(s);
328 break;
329 case '%':
330 *s++ = '%';
331 break;
332 default:
333 strcpy(s, p);
334 s += strlen(s);
335 goto end;
336 }
337 } else
338 *s++ = *f;
339 }
Christian Heimes44720832008-05-26 13:01:01 +0000340
341 end:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000342 _PyString_Resize(&string, s - PyString_AS_STRING(string));
343 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000344}
345
346PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000347PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000348{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000349 PyObject* ret;
350 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000351
352#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000353 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000354#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000355 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000356#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000357 ret = PyString_FromFormatV(format, vargs);
358 va_end(vargs);
359 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000360}
361
362
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000363PyObject *PyString_Decode(const char *s,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000364 Py_ssize_t size,
365 const char *encoding,
366 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000367{
368 PyObject *v, *str;
369
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000370 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000371 if (str == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000372 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000373 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000374 Py_DECREF(str);
375 return v;
376}
377
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000378PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000379 const char *encoding,
380 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000381{
382 PyObject *v;
383
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000384 if (!PyString_Check(str)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000385 PyErr_BadArgument();
386 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000387 }
388
Christian Heimes44720832008-05-26 13:01:01 +0000389 if (encoding == NULL) {
390#ifdef Py_USING_UNICODE
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000391 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000392#else
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000393 PyErr_SetString(PyExc_ValueError, "no encoding specified");
394 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000395#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000396 }
Christian Heimes44720832008-05-26 13:01:01 +0000397
398 /* Decode via the codec registry */
399 v = PyCodec_Decode(str, encoding, errors);
400 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000401 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000402
403 return v;
404
405 onError:
406 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000407}
408
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000409PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000410 const char *encoding,
411 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000412{
Christian Heimes44720832008-05-26 13:01:01 +0000413 PyObject *v;
414
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000415 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000416 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000417 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000418
419#ifdef Py_USING_UNICODE
420 /* Convert Unicode to a string using the default encoding */
421 if (PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000422 PyObject *temp = v;
423 v = PyUnicode_AsEncodedString(v, NULL, NULL);
424 Py_DECREF(temp);
425 if (v == NULL)
426 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000427 }
Christian Heimes44720832008-05-26 13:01:01 +0000428#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000429 if (!PyString_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000430 PyErr_Format(PyExc_TypeError,
431 "decoder did not return a string object (type=%.400s)",
432 Py_TYPE(v)->tp_name);
433 Py_DECREF(v);
434 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000435 }
Christian Heimes44720832008-05-26 13:01:01 +0000436
437 return v;
438
439 onError:
440 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000441}
442
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000443PyObject *PyString_Encode(const char *s,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000444 Py_ssize_t size,
445 const char *encoding,
446 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000447{
Christian Heimes44720832008-05-26 13:01:01 +0000448 PyObject *v, *str;
449
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000450 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000451 if (str == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000452 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000453 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000454 Py_DECREF(str);
455 return v;
456}
457
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000458PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000459 const char *encoding,
460 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000461{
462 PyObject *v;
463
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000464 if (!PyString_Check(str)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000465 PyErr_BadArgument();
466 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000467 }
468
469 if (encoding == NULL) {
470#ifdef Py_USING_UNICODE
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000471 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000472#else
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000473 PyErr_SetString(PyExc_ValueError, "no encoding specified");
474 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000475#endif
476 }
477
478 /* Encode via the codec registry */
479 v = PyCodec_Encode(str, encoding, errors);
480 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000481 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000482
483 return v;
484
485 onError:
486 return NULL;
487}
488
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000489PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000490 const char *encoding,
491 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000492{
493 PyObject *v;
494
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000495 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000496 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000497 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000498
499#ifdef Py_USING_UNICODE
500 /* Convert Unicode to a string using the default encoding */
501 if (PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000502 PyObject *temp = v;
503 v = PyUnicode_AsEncodedString(v, NULL, NULL);
504 Py_DECREF(temp);
505 if (v == NULL)
506 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000507 }
508#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000509 if (!PyString_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000510 PyErr_Format(PyExc_TypeError,
511 "encoder did not return a string object (type=%.400s)",
512 Py_TYPE(v)->tp_name);
513 Py_DECREF(v);
514 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000515 }
516
517 return v;
518
519 onError:
520 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000521}
522
523static void
Christian Heimes44720832008-05-26 13:01:01 +0000524string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000525{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000526 switch (PyString_CHECK_INTERNED(op)) {
527 case SSTATE_NOT_INTERNED:
528 break;
Christian Heimes44720832008-05-26 13:01:01 +0000529
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000530 case SSTATE_INTERNED_MORTAL:
531 /* revive dead object temporarily for DelItem */
532 Py_REFCNT(op) = 3;
533 if (PyDict_DelItem(interned, op) != 0)
534 Py_FatalError(
535 "deletion of interned string failed");
536 break;
Christian Heimes44720832008-05-26 13:01:01 +0000537
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000538 case SSTATE_INTERNED_IMMORTAL:
539 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000540
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000541 default:
542 Py_FatalError("Inconsistent interned string state.");
543 }
544 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000545}
546
Christian Heimes44720832008-05-26 13:01:01 +0000547/* Unescape a backslash-escaped string. If unicode is non-zero,
548 the string is a u-literal. If recode_encoding is non-zero,
549 the string is UTF-8 encoded and should be re-encoded in the
550 specified encoding. */
551
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000552PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000553 Py_ssize_t len,
554 const char *errors,
555 Py_ssize_t unicode,
556 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000557{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000558 int c;
559 char *p, *buf;
560 const char *end;
561 PyObject *v;
562 Py_ssize_t newlen = recode_encoding ? 4*len:len;
563 v = PyString_FromStringAndSize((char *)NULL, newlen);
564 if (v == NULL)
565 return NULL;
566 p = buf = PyString_AsString(v);
567 end = s + len;
568 while (s < end) {
569 if (*s != '\\') {
570 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000571#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000572 if (recode_encoding && (*s & 0x80)) {
573 PyObject *u, *w;
574 char *r;
575 const char* t;
576 Py_ssize_t rn;
577 t = s;
578 /* Decode non-ASCII bytes as UTF-8. */
579 while (t < end && (*t & 0x80)) t++;
580 u = PyUnicode_DecodeUTF8(s, t - s, errors);
581 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000583 /* Recode them in target encoding. */
584 w = PyUnicode_AsEncodedString(
585 u, recode_encoding, errors);
586 Py_DECREF(u);
587 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000588
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000589 /* Append bytes to output buffer. */
590 assert(PyString_Check(w));
591 r = PyString_AS_STRING(w);
592 rn = PyString_GET_SIZE(w);
593 Py_MEMCPY(p, r, rn);
594 p += rn;
595 Py_DECREF(w);
596 s = t;
597 } else {
598 *p++ = *s++;
599 }
Christian Heimes44720832008-05-26 13:01:01 +0000600#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000601 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000602#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000603 continue;
604 }
605 s++;
606 if (s==end) {
607 PyErr_SetString(PyExc_ValueError,
608 "Trailing \\ in string");
609 goto failed;
610 }
611 switch (*s++) {
612 /* XXX This assumes ASCII! */
613 case '\n': break;
614 case '\\': *p++ = '\\'; break;
615 case '\'': *p++ = '\''; break;
616 case '\"': *p++ = '\"'; break;
617 case 'b': *p++ = '\b'; break;
618 case 'f': *p++ = '\014'; break; /* FF */
619 case 't': *p++ = '\t'; break;
620 case 'n': *p++ = '\n'; break;
621 case 'r': *p++ = '\r'; break;
622 case 'v': *p++ = '\013'; break; /* VT */
623 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
624 case '0': case '1': case '2': case '3':
625 case '4': case '5': case '6': case '7':
626 c = s[-1] - '0';
627 if (s < end && '0' <= *s && *s <= '7') {
628 c = (c<<3) + *s++ - '0';
629 if (s < end && '0' <= *s && *s <= '7')
630 c = (c<<3) + *s++ - '0';
631 }
632 *p++ = c;
633 break;
634 case 'x':
635 if (s+1 < end &&
636 isxdigit(Py_CHARMASK(s[0])) &&
637 isxdigit(Py_CHARMASK(s[1])))
638 {
639 unsigned int x = 0;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x = c - '0';
644 else if (islower(c))
645 x = 10 + c - 'a';
646 else
647 x = 10 + c - 'A';
648 x = x << 4;
649 c = Py_CHARMASK(*s);
650 s++;
651 if (isdigit(c))
652 x += c - '0';
653 else if (islower(c))
654 x += 10 + c - 'a';
655 else
656 x += 10 + c - 'A';
657 *p++ = x;
658 break;
659 }
660 if (!errors || strcmp(errors, "strict") == 0) {
661 PyErr_SetString(PyExc_ValueError,
662 "invalid \\x escape");
663 goto failed;
664 }
665 if (strcmp(errors, "replace") == 0) {
666 *p++ = '?';
667 } else if (strcmp(errors, "ignore") == 0)
668 /* do nothing */;
669 else {
670 PyErr_Format(PyExc_ValueError,
671 "decoding error; "
672 "unknown error handling code: %.400s",
673 errors);
674 goto failed;
675 }
Christian Heimes44720832008-05-26 13:01:01 +0000676#ifndef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000677 case 'u':
678 case 'U':
679 case 'N':
680 if (unicode) {
681 PyErr_SetString(PyExc_ValueError,
682 "Unicode escapes not legal "
683 "when Unicode disabled");
684 goto failed;
685 }
Christian Heimes44720832008-05-26 13:01:01 +0000686#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000687 default:
688 *p++ = '\\';
689 s--;
690 goto non_esc; /* an arbitry number of unescaped
691 UTF-8 bytes may follow. */
692 }
693 }
694 if (p-buf < newlen)
695 _PyString_Resize(&v, p - buf);
696 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000697 failed:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000698 Py_DECREF(v);
699 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000700}
701
702/* -------------------------------------------------------------------- */
703/* object api */
704
Christian Heimes1a6387e2008-03-26 12:49:49 +0000705static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000706string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000707{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000708 char *s;
709 Py_ssize_t len;
710 if (PyString_AsStringAndSize(op, &s, &len))
711 return -1;
712 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000713}
714
Christian Heimes44720832008-05-26 13:01:01 +0000715static /*const*/ char *
716string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000717{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000718 char *s;
719 Py_ssize_t len;
720 if (PyString_AsStringAndSize(op, &s, &len))
721 return NULL;
722 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000723}
724
725Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000726PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000727{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000728 if (!PyString_Check(op))
729 return string_getsize(op);
730 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000731}
732
Christian Heimes44720832008-05-26 13:01:01 +0000733/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000734PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000735{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000736 if (!PyString_Check(op))
737 return string_getbuffer(op);
738 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000739}
740
741int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000742PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000743 register char **s,
744 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000745{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000746 if (s == NULL) {
747 PyErr_BadInternalCall();
748 return -1;
749 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000750
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000751 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000752#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000753 if (PyUnicode_Check(obj)) {
754 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
755 if (obj == NULL)
756 return -1;
757 }
758 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000759#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000760 {
761 PyErr_Format(PyExc_TypeError,
762 "expected string or Unicode object, "
763 "%.200s found", Py_TYPE(obj)->tp_name);
764 return -1;
765 }
766 }
Christian Heimes44720832008-05-26 13:01:01 +0000767
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000768 *s = PyString_AS_STRING(obj);
769 if (len != NULL)
770 *len = PyString_GET_SIZE(obj);
771 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
772 PyErr_SetString(PyExc_TypeError,
773 "expected string without null bytes");
774 return -1;
775 }
776 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000777}
778
Christian Heimes1a6387e2008-03-26 12:49:49 +0000779/* -------------------------------------------------------------------- */
780/* Methods */
781
Christian Heimes44720832008-05-26 13:01:01 +0000782#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000784
Christian Heimes1a6387e2008-03-26 12:49:49 +0000785#include "stringlib/count.h"
786#include "stringlib/find.h"
787#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000789#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000790#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792
793
794static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000797 Py_ssize_t i, str_len;
798 char c;
799 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000800
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000801 /* XXX Ought to check for interrupts when writing long strings */
802 if (! PyString_CheckExact(op)) {
803 int ret;
804 /* A str subclass may have its own __str__ method. */
805 op = (PyStringObject *) PyObject_Str((PyObject *)op);
806 if (op == NULL)
807 return -1;
808 ret = string_print(op, fp, flags);
809 Py_DECREF(op);
810 return ret;
811 }
812 if (flags & Py_PRINT_RAW) {
813 char *data = op->ob_sval;
814 Py_ssize_t size = Py_SIZE(op);
815 Py_BEGIN_ALLOW_THREADS
816 while (size > INT_MAX) {
817 /* Very long strings cannot be written atomically.
818 * But don't write exactly INT_MAX bytes at a time
819 * to avoid memory aligment issues.
820 */
821 const int chunk_size = INT_MAX & ~0x3FFF;
822 fwrite(data, 1, chunk_size, fp);
823 data += chunk_size;
824 size -= chunk_size;
825 }
Christian Heimes44720832008-05-26 13:01:01 +0000826#ifdef __VMS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000827 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000828#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000829 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000830#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000831 Py_END_ALLOW_THREADS
832 return 0;
833 }
Christian Heimes44720832008-05-26 13:01:01 +0000834
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000835 /* figure out which quote to use; single is preferred */
836 quote = '\'';
837 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
838 !memchr(op->ob_sval, '"', Py_SIZE(op)))
839 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000840
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000841 str_len = Py_SIZE(op);
842 Py_BEGIN_ALLOW_THREADS
843 fputc(quote, fp);
844 for (i = 0; i < str_len; i++) {
845 /* Since strings are immutable and the caller should have a
846 reference, accessing the interal buffer should not be an issue
847 with the GIL released. */
848 c = op->ob_sval[i];
849 if (c == quote || c == '\\')
850 fprintf(fp, "\\%c", c);
851 else if (c == '\t')
852 fprintf(fp, "\\t");
853 else if (c == '\n')
854 fprintf(fp, "\\n");
855 else if (c == '\r')
856 fprintf(fp, "\\r");
857 else if (c < ' ' || c >= 0x7f)
858 fprintf(fp, "\\x%02x", c & 0xff);
859 else
860 fputc(c, fp);
861 }
862 fputc(quote, fp);
863 Py_END_ALLOW_THREADS
864 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000865}
866
Christian Heimes44720832008-05-26 13:01:01 +0000867PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000868PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000869{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000870 register PyStringObject* op = (PyStringObject*) obj;
871 size_t newsize = 2 + 4 * Py_SIZE(op);
872 PyObject *v;
873 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
874 PyErr_SetString(PyExc_OverflowError,
875 "string is too large to make repr");
876 return NULL;
877 }
878 v = PyString_FromStringAndSize((char *)NULL, newsize);
879 if (v == NULL) {
880 return NULL;
881 }
882 else {
883 register Py_ssize_t i;
884 register char c;
885 register char *p;
886 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000887
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000888 /* figure out which quote to use; single is preferred */
889 quote = '\'';
890 if (smartquotes &&
891 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000894
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000895 p = PyString_AS_STRING(v);
896 *p++ = quote;
897 for (i = 0; i < Py_SIZE(op); i++) {
898 /* There's at least enough room for a hex escape
899 and a closing quote. */
900 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
901 c = op->ob_sval[i];
902 if (c == quote || c == '\\')
903 *p++ = '\\', *p++ = c;
904 else if (c == '\t')
905 *p++ = '\\', *p++ = 't';
906 else if (c == '\n')
907 *p++ = '\\', *p++ = 'n';
908 else if (c == '\r')
909 *p++ = '\\', *p++ = 'r';
910 else if (c < ' ' || c >= 0x7f) {
911 /* For performance, we don't want to call
912 PyOS_snprintf here (extra layers of
913 function call). */
914 sprintf(p, "\\x%02x", c & 0xff);
915 p += 4;
916 }
917 else
918 *p++ = c;
919 }
920 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
921 *p++ = quote;
922 *p = '\0';
923 _PyString_Resize(
924 &v, (p - PyString_AS_STRING(v)));
925 return v;
926 }
Christian Heimes44720832008-05-26 13:01:01 +0000927}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000928
929static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000930string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000931{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000932 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000933}
934
Christian Heimes1a6387e2008-03-26 12:49:49 +0000935static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000936string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000937{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000938 assert(PyString_Check(s));
939 if (PyString_CheckExact(s)) {
940 Py_INCREF(s);
941 return s;
942 }
943 else {
944 /* Subtype -- return genuine string with the same value. */
945 PyStringObject *t = (PyStringObject *) s;
946 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
947 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000948}
949
Christian Heimes44720832008-05-26 13:01:01 +0000950static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000951string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +0000952{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000953 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +0000954}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000955
Christian Heimes44720832008-05-26 13:01:01 +0000956static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000957string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +0000958{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000959 register Py_ssize_t size;
960 register PyStringObject *op;
961 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +0000962#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000963 if (PyUnicode_Check(bb))
964 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +0000965#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000966 if (PyByteArray_Check(bb))
967 return PyByteArray_Concat((PyObject *)a, bb);
968 PyErr_Format(PyExc_TypeError,
969 "cannot concatenate 'str' and '%.200s' objects",
970 Py_TYPE(bb)->tp_name);
971 return NULL;
972 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000973#define b ((PyStringObject *)bb)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000974 /* Optimize cases with empty left or right operand */
975 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
976 PyString_CheckExact(a) && PyString_CheckExact(b)) {
977 if (Py_SIZE(a) == 0) {
978 Py_INCREF(bb);
979 return bb;
980 }
981 Py_INCREF(a);
982 return (PyObject *)a;
983 }
984 size = Py_SIZE(a) + Py_SIZE(b);
985 /* Check that string sizes are not negative, to prevent an
986 overflow in cases where we are passed incorrectly-created
987 strings with negative lengths (due to a bug in other code).
988 */
989 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
990 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
991 PyErr_SetString(PyExc_OverflowError,
992 "strings are too large to concat");
993 return NULL;
994 }
995
996 /* Inline PyObject_NewVar */
997 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
998 PyErr_SetString(PyExc_OverflowError,
999 "strings are too large to concat");
1000 return NULL;
1001 }
1002 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
1003 if (op == NULL)
1004 return PyErr_NoMemory();
1005 PyObject_INIT_VAR(op, &PyString_Type, size);
1006 op->ob_shash = -1;
1007 op->ob_sstate = SSTATE_NOT_INTERNED;
1008 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1009 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1010 op->ob_sval[size] = '\0';
1011 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001012#undef b
1013}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001014
Christian Heimes44720832008-05-26 13:01:01 +00001015static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001016string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001017{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001018 register Py_ssize_t i;
1019 register Py_ssize_t j;
1020 register Py_ssize_t size;
1021 register PyStringObject *op;
1022 size_t nbytes;
1023 if (n < 0)
1024 n = 0;
1025 /* watch out for overflows: the size can overflow int,
1026 * and the # of bytes needed can overflow size_t
1027 */
1028 size = Py_SIZE(a) * n;
1029 if (n && size / n != Py_SIZE(a)) {
1030 PyErr_SetString(PyExc_OverflowError,
1031 "repeated string is too long");
1032 return NULL;
1033 }
1034 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 nbytes = (size_t)size;
1039 if (nbytes + sizeof(PyStringObject) <= nbytes) {
1040 PyErr_SetString(PyExc_OverflowError,
1041 "repeated string is too long");
1042 return NULL;
1043 }
1044 op = (PyStringObject *)
1045 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1046 if (op == NULL)
1047 return PyErr_NoMemory();
1048 PyObject_INIT_VAR(op, &PyString_Type, size);
1049 op->ob_shash = -1;
1050 op->ob_sstate = SSTATE_NOT_INTERNED;
1051 op->ob_sval[size] = '\0';
1052 if (Py_SIZE(a) == 1 && n > 0) {
1053 memset(op->ob_sval, a->ob_sval[0] , n);
1054 return (PyObject *) op;
1055 }
1056 i = 0;
1057 if (i < size) {
1058 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1059 i = Py_SIZE(a);
1060 }
1061 while (i < size) {
1062 j = (i <= size-i) ? i : size-i;
1063 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1064 i += j;
1065 }
1066 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1070
1071static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001072string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001073 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001074 /* j -- may be negative! */
1075{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001076 if (i < 0)
1077 i = 0;
1078 if (j < 0)
1079 j = 0; /* Avoid signed/unsigned bug in next line */
1080 if (j > Py_SIZE(a))
1081 j = Py_SIZE(a);
1082 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1083 /* It's the same as a */
1084 Py_INCREF(a);
1085 return (PyObject *)a;
1086 }
1087 if (j < i)
1088 j = i;
1089 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001090}
1091
1092static int
1093string_contains(PyObject *str_obj, PyObject *sub_obj)
1094{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001095 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001096#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001097 if (PyUnicode_Check(sub_obj))
1098 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001099#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001100 if (!PyString_Check(sub_obj)) {
1101 PyErr_Format(PyExc_TypeError,
1102 "'in <string>' requires string as left operand, "
1103 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1104 return -1;
1105 }
1106 }
Christian Heimes44720832008-05-26 13:01:01 +00001107
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001108 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001109}
1110
1111static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001112string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001113{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001114 char pchar;
1115 PyObject *v;
1116 if (i < 0 || i >= Py_SIZE(a)) {
1117 PyErr_SetString(PyExc_IndexError, "string index out of range");
1118 return NULL;
1119 }
1120 pchar = a->ob_sval[i];
1121 v = (PyObject *)characters[pchar & UCHAR_MAX];
1122 if (v == NULL)
1123 v = PyString_FromStringAndSize(&pchar, 1);
1124 else {
Christian Heimes44720832008-05-26 13:01:01 +00001125#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001126 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001127#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001128 Py_INCREF(v);
1129 }
1130 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001131}
1132
1133static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001134string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001135{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001136 int c;
1137 Py_ssize_t len_a, len_b;
1138 Py_ssize_t min_len;
1139 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001140
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001141 /* Make sure both arguments are strings. */
1142 if (!(PyString_Check(a) && PyString_Check(b))) {
1143 result = Py_NotImplemented;
1144 goto out;
1145 }
1146 if (a == b) {
1147 switch (op) {
1148 case Py_EQ:case Py_LE:case Py_GE:
1149 result = Py_True;
1150 goto out;
1151 case Py_NE:case Py_LT:case Py_GT:
1152 result = Py_False;
1153 goto out;
1154 }
1155 }
1156 if (op == Py_EQ) {
1157 /* Supporting Py_NE here as well does not save
1158 much time, since Py_NE is rarely used. */
1159 if (Py_SIZE(a) == Py_SIZE(b)
1160 && (a->ob_sval[0] == b->ob_sval[0]
1161 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1162 result = Py_True;
1163 } else {
1164 result = Py_False;
1165 }
1166 goto out;
1167 }
1168 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1169 min_len = (len_a < len_b) ? len_a : len_b;
1170 if (min_len > 0) {
1171 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1172 if (c==0)
1173 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1174 } else
1175 c = 0;
1176 if (c == 0)
1177 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1178 switch (op) {
1179 case Py_LT: c = c < 0; break;
1180 case Py_LE: c = c <= 0; break;
1181 case Py_EQ: assert(0); break; /* unreachable */
1182 case Py_NE: c = c != 0; break;
1183 case Py_GT: c = c > 0; break;
1184 case Py_GE: c = c >= 0; break;
1185 default:
1186 result = Py_NotImplemented;
1187 goto out;
1188 }
1189 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001190 out:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001191 Py_INCREF(result);
1192 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001193}
1194
1195int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001196_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001197{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001198 PyStringObject *a = (PyStringObject*) o1;
1199 PyStringObject *b = (PyStringObject*) o2;
1200 return Py_SIZE(a) == Py_SIZE(b)
1201 && *a->ob_sval == *b->ob_sval
1202 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001203}
1204
1205static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001206string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001207{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001208 register Py_ssize_t len;
1209 register unsigned char *p;
1210 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001211
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001212 if (a->ob_shash != -1)
1213 return a->ob_shash;
1214 len = Py_SIZE(a);
1215 p = (unsigned char *) a->ob_sval;
1216 x = *p << 7;
1217 while (--len >= 0)
1218 x = (1000003*x) ^ *p++;
1219 x ^= Py_SIZE(a);
1220 if (x == -1)
1221 x = -2;
1222 a->ob_shash = x;
1223 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001224}
1225
1226static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001227string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001228{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001229 if (PyIndex_Check(item)) {
1230 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1231 if (i == -1 && PyErr_Occurred())
1232 return NULL;
1233 if (i < 0)
1234 i += PyString_GET_SIZE(self);
1235 return string_item(self, i);
1236 }
1237 else if (PySlice_Check(item)) {
1238 Py_ssize_t start, stop, step, slicelength, cur, i;
1239 char* source_buf;
1240 char* result_buf;
1241 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001242
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001243 if (PySlice_GetIndicesEx((PySliceObject*)item,
1244 PyString_GET_SIZE(self),
1245 &start, &stop, &step, &slicelength) < 0) {
1246 return NULL;
1247 }
Christian Heimes44720832008-05-26 13:01:01 +00001248
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001249 if (slicelength <= 0) {
1250 return PyString_FromStringAndSize("", 0);
1251 }
1252 else if (start == 0 && step == 1 &&
1253 slicelength == PyString_GET_SIZE(self) &&
1254 PyString_CheckExact(self)) {
1255 Py_INCREF(self);
1256 return (PyObject *)self;
1257 }
1258 else if (step == 1) {
1259 return PyString_FromStringAndSize(
1260 PyString_AS_STRING(self) + start,
1261 slicelength);
1262 }
1263 else {
1264 source_buf = PyString_AsString((PyObject*)self);
1265 result_buf = (char *)PyMem_Malloc(slicelength);
1266 if (result_buf == NULL)
1267 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001268
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001269 for (cur = start, i = 0; i < slicelength;
1270 cur += step, i++) {
1271 result_buf[i] = source_buf[cur];
1272 }
Christian Heimes44720832008-05-26 13:01:01 +00001273
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001274 result = PyString_FromStringAndSize(result_buf,
1275 slicelength);
1276 PyMem_Free(result_buf);
1277 return result;
1278 }
1279 }
1280 else {
1281 PyErr_Format(PyExc_TypeError,
1282 "string indices must be integers, not %.200s",
1283 Py_TYPE(item)->tp_name);
1284 return NULL;
1285 }
Christian Heimes44720832008-05-26 13:01:01 +00001286}
1287
1288static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001289string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001290{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001291 if ( index != 0 ) {
1292 PyErr_SetString(PyExc_SystemError,
1293 "accessing non-existent string segment");
1294 return -1;
1295 }
1296 *ptr = (void *)self->ob_sval;
1297 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001298}
1299
1300static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001301string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001302{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001303 PyErr_SetString(PyExc_TypeError,
1304 "Cannot use string as modifiable buffer");
1305 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001306}
1307
1308static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001309string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001310{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001311 if ( lenp )
1312 *lenp = Py_SIZE(self);
1313 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001314}
1315
1316static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001317string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001318{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001319 if ( index != 0 ) {
1320 PyErr_SetString(PyExc_SystemError,
1321 "accessing non-existent string segment");
1322 return -1;
1323 }
1324 *ptr = self->ob_sval;
1325 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001326}
1327
1328static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001329string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001330{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001331 return PyBuffer_FillInfo(view, (PyObject*)self,
1332 (void *)self->ob_sval, Py_SIZE(self),
1333 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001334}
1335
1336static PySequenceMethods string_as_sequence = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001337 (lenfunc)string_length, /*sq_length*/
1338 (binaryfunc)string_concat, /*sq_concat*/
1339 (ssizeargfunc)string_repeat, /*sq_repeat*/
1340 (ssizeargfunc)string_item, /*sq_item*/
1341 (ssizessizeargfunc)string_slice, /*sq_slice*/
1342 0, /*sq_ass_item*/
1343 0, /*sq_ass_slice*/
1344 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001345};
1346
1347static PyMappingMethods string_as_mapping = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001348 (lenfunc)string_length,
1349 (binaryfunc)string_subscript,
1350 0,
Christian Heimes44720832008-05-26 13:01:01 +00001351};
1352
1353static PyBufferProcs string_as_buffer = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001354 (readbufferproc)string_buffer_getreadbuf,
1355 (writebufferproc)string_buffer_getwritebuf,
1356 (segcountproc)string_buffer_getsegcount,
1357 (charbufferproc)string_buffer_getcharbuf,
1358 (getbufferproc)string_buffer_getbuffer,
1359 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001360};
1361
1362
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001363
Christian Heimes44720832008-05-26 13:01:01 +00001364#define LEFTSTRIP 0
1365#define RIGHTSTRIP 1
1366#define BOTHSTRIP 2
1367
1368/* Arrays indexed by above */
1369static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1370
1371#define STRIPNAME(i) (stripformat[i]+3)
1372
Christian Heimes1a6387e2008-03-26 12:49:49 +00001373
1374/* Don't call if length < 2 */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001375#define Py_STRING_MATCH(target, offset, pattern, length) \
1376 (target[offset] == pattern[0] && \
1377 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001378 !memcmp(target+offset+1, pattern+1, length-2) )
1379
1380
Christian Heimes1a6387e2008-03-26 12:49:49 +00001381/* Overallocate the initial list to reduce the number of reallocs for small
1382 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1383 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1384 text (roughly 11 words per line) and field delimited data (usually 1-10
1385 fields). For large strings the split algorithms are bandwidth limited
1386 so increasing the preallocation likely will not improve things.*/
1387
1388#define MAX_PREALLOC 12
1389
1390/* 5 splits gives 6 elements */
1391#define PREALLOC_SIZE(maxsplit) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001392 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001393
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001394#define SPLIT_APPEND(data, left, right) \
1395 str = PyString_FromStringAndSize((data) + (left), \
1396 (right) - (left)); \
1397 if (str == NULL) \
1398 goto onError; \
1399 if (PyList_Append(list, str)) { \
1400 Py_DECREF(str); \
1401 goto onError; \
1402 } \
1403 else \
1404 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001405
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001406#define SPLIT_ADD(data, left, right) { \
1407 str = PyString_FromStringAndSize((data) + (left), \
1408 (right) - (left)); \
1409 if (str == NULL) \
1410 goto onError; \
1411 if (count < MAX_PREALLOC) { \
1412 PyList_SET_ITEM(list, count, str); \
1413 } else { \
1414 if (PyList_Append(list, str)) { \
1415 Py_DECREF(str); \
1416 goto onError; \
1417 } \
1418 else \
1419 Py_DECREF(str); \
1420 } \
1421 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001422
1423/* Always force the list to the expected size. */
1424#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1425
Christian Heimes44720832008-05-26 13:01:01 +00001426#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1427#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1428#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1429#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001430
1431Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001432split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001433{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001434 const char *s = PyString_AS_STRING(self);
1435 Py_ssize_t i, j, count=0;
1436 PyObject *str;
1437 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001438
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001439 if (list == NULL)
1440 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001441
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001442 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001443
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001444 while (maxsplit-- > 0) {
1445 SKIP_SPACE(s, i, len);
1446 if (i==len) break;
1447 j = i; i++;
1448 SKIP_NONSPACE(s, i, len);
1449 if (j == 0 && i == len && PyString_CheckExact(self)) {
1450 /* No whitespace in self, so just use it as list[0] */
1451 Py_INCREF(self);
1452 PyList_SET_ITEM(list, 0, (PyObject *)self);
1453 count++;
1454 break;
1455 }
1456 SPLIT_ADD(s, j, i);
1457 }
Christian Heimes44720832008-05-26 13:01:01 +00001458
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001459 if (i < len) {
1460 /* Only occurs when maxsplit was reached */
1461 /* Skip any remaining whitespace and copy to end of string */
1462 SKIP_SPACE(s, i, len);
1463 if (i != len)
1464 SPLIT_ADD(s, i, len);
1465 }
1466 FIX_PREALLOC_SIZE(list);
1467 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001468 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001469 Py_DECREF(list);
1470 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001471}
1472
Christian Heimes1a6387e2008-03-26 12:49:49 +00001473Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001474split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001475{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001476 const char *s = PyString_AS_STRING(self);
1477 register Py_ssize_t i, j, count=0;
1478 PyObject *str;
1479 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001480
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001481 if (list == NULL)
1482 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001483
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001484 i = j = 0;
1485 while ((j < len) && (maxcount-- > 0)) {
1486 for(; j<len; j++) {
1487 /* I found that using memchr makes no difference */
1488 if (s[j] == ch) {
1489 SPLIT_ADD(s, i, j);
1490 i = j = j + 1;
1491 break;
1492 }
1493 }
1494 }
1495 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1496 /* ch not in self, so just use self as list[0] */
1497 Py_INCREF(self);
1498 PyList_SET_ITEM(list, 0, (PyObject *)self);
1499 count++;
1500 }
1501 else if (i <= len) {
1502 SPLIT_ADD(s, i, len);
1503 }
1504 FIX_PREALLOC_SIZE(list);
1505 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001506
1507 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001508 Py_DECREF(list);
1509 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001510}
1511
1512PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001513"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001514\n\
Christian Heimes44720832008-05-26 13:01:01 +00001515Return a list of the words in the string S, using sep as the\n\
1516delimiter string. If maxsplit is given, at most maxsplit\n\
1517splits are done. If sep is not specified or is None, any\n\
1518whitespace string is a separator and empty strings are removed\n\
1519from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001520
1521static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001522string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001523{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001524 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1525 Py_ssize_t maxsplit = -1, count=0;
1526 const char *s = PyString_AS_STRING(self), *sub;
1527 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001528#ifdef USE_FAST
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001529 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001530#endif
1531
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001532 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1533 return NULL;
1534 if (maxsplit < 0)
1535 maxsplit = PY_SSIZE_T_MAX;
1536 if (subobj == Py_None)
1537 return split_whitespace(self, len, maxsplit);
1538 if (PyString_Check(subobj)) {
1539 sub = PyString_AS_STRING(subobj);
1540 n = PyString_GET_SIZE(subobj);
1541 }
Christian Heimes44720832008-05-26 13:01:01 +00001542#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001543 else if (PyUnicode_Check(subobj))
1544 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001545#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001546 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1547 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001548
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001549 if (n == 0) {
1550 PyErr_SetString(PyExc_ValueError, "empty separator");
1551 return NULL;
1552 }
1553 else if (n == 1)
1554 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001555
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001556 list = PyList_New(PREALLOC_SIZE(maxsplit));
1557 if (list == NULL)
1558 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001559
1560#ifdef USE_FAST
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001561 i = j = 0;
1562 while (maxsplit-- > 0) {
1563 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1564 if (pos < 0)
1565 break;
1566 j = i+pos;
1567 SPLIT_ADD(s, i, j);
1568 i = j + n;
1569 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001570#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001571 i = j = 0;
1572 while ((j+n <= len) && (maxsplit-- > 0)) {
1573 for (; j+n <= len; j++) {
1574 if (Py_STRING_MATCH(s, j, sub, n)) {
1575 SPLIT_ADD(s, i, j);
1576 i = j = j + n;
1577 break;
1578 }
1579 }
1580 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001581#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001582 SPLIT_ADD(s, i, len);
1583 FIX_PREALLOC_SIZE(list);
1584 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001585
Christian Heimes44720832008-05-26 13:01:01 +00001586 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001587 Py_DECREF(list);
1588 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001589}
1590
1591PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001592"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001593\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001594Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001595the separator itself, and the part after it. If the separator is not\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001596found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001597
1598static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001599string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001600{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001601 const char *sep;
1602 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001603
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001604 if (PyString_Check(sep_obj)) {
1605 sep = PyString_AS_STRING(sep_obj);
1606 sep_len = PyString_GET_SIZE(sep_obj);
1607 }
Christian Heimes44720832008-05-26 13:01:01 +00001608#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001609 else if (PyUnicode_Check(sep_obj))
1610 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001611#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001612 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1613 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001614
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001615 return stringlib_partition(
1616 (PyObject*) self,
1617 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1618 sep_obj, sep, sep_len
1619 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001620}
1621
1622PyDoc_STRVAR(rpartition__doc__,
Ezio Melottidabb5f72010-01-25 11:46:11 +00001623"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001624\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001625Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001626the part before it, the separator itself, and the part after it. If the\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001627separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001628
1629static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001630string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001631{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001632 const char *sep;
1633 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001634
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001635 if (PyString_Check(sep_obj)) {
1636 sep = PyString_AS_STRING(sep_obj);
1637 sep_len = PyString_GET_SIZE(sep_obj);
1638 }
Christian Heimes44720832008-05-26 13:01:01 +00001639#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001640 else if (PyUnicode_Check(sep_obj))
1641 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001642#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001643 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1644 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001645
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001646 return stringlib_rpartition(
1647 (PyObject*) self,
1648 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1649 sep_obj, sep, sep_len
1650 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001651}
1652
1653Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001654rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001655{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001656 const char *s = PyString_AS_STRING(self);
1657 Py_ssize_t i, j, count=0;
1658 PyObject *str;
1659 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001660
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001661 if (list == NULL)
1662 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001663
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001664 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001665
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001666 while (maxsplit-- > 0) {
1667 RSKIP_SPACE(s, i);
1668 if (i<0) break;
1669 j = i; i--;
1670 RSKIP_NONSPACE(s, i);
1671 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1672 /* No whitespace in self, so just use it as list[0] */
1673 Py_INCREF(self);
1674 PyList_SET_ITEM(list, 0, (PyObject *)self);
1675 count++;
1676 break;
1677 }
1678 SPLIT_ADD(s, i + 1, j + 1);
1679 }
1680 if (i >= 0) {
1681 /* Only occurs when maxsplit was reached */
1682 /* Skip any remaining whitespace and copy to beginning of string */
1683 RSKIP_SPACE(s, i);
1684 if (i >= 0)
1685 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001686
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001687 }
1688 FIX_PREALLOC_SIZE(list);
1689 if (PyList_Reverse(list) < 0)
1690 goto onError;
1691 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001692 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001693 Py_DECREF(list);
1694 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001695}
1696
1697Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001698rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001699{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001700 const char *s = PyString_AS_STRING(self);
1701 register Py_ssize_t i, j, count=0;
1702 PyObject *str;
1703 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001704
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001705 if (list == NULL)
1706 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001707
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001708 i = j = len - 1;
1709 while ((i >= 0) && (maxcount-- > 0)) {
1710 for (; i >= 0; i--) {
1711 if (s[i] == ch) {
1712 SPLIT_ADD(s, i + 1, j + 1);
1713 j = i = i - 1;
1714 break;
1715 }
1716 }
1717 }
1718 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1719 /* ch not in self, so just use self as list[0] */
1720 Py_INCREF(self);
1721 PyList_SET_ITEM(list, 0, (PyObject *)self);
1722 count++;
1723 }
1724 else if (j >= -1) {
1725 SPLIT_ADD(s, 0, j + 1);
1726 }
1727 FIX_PREALLOC_SIZE(list);
1728 if (PyList_Reverse(list) < 0)
1729 goto onError;
1730 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001731
Christian Heimes44720832008-05-26 13:01:01 +00001732 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001733 Py_DECREF(list);
1734 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001735}
1736
1737PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001738"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001739\n\
Christian Heimes44720832008-05-26 13:01:01 +00001740Return a list of the words in the string S, using sep as the\n\
1741delimiter string, starting at the end of the string and working\n\
1742to the front. If maxsplit is given, at most maxsplit splits are\n\
1743done. If sep is not specified or is None, any whitespace string\n\
1744is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001745
1746static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001747string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001748{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001749 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1750 Py_ssize_t maxsplit = -1, count=0;
1751 const char *s, *sub;
1752 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001753
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001754 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1755 return NULL;
1756 if (maxsplit < 0)
1757 maxsplit = PY_SSIZE_T_MAX;
1758 if (subobj == Py_None)
1759 return rsplit_whitespace(self, len, maxsplit);
1760 if (PyString_Check(subobj)) {
1761 sub = PyString_AS_STRING(subobj);
1762 n = PyString_GET_SIZE(subobj);
1763 }
Christian Heimes44720832008-05-26 13:01:01 +00001764#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001765 else if (PyUnicode_Check(subobj))
1766 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001767#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001768 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1769 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001770
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001771 if (n == 0) {
1772 PyErr_SetString(PyExc_ValueError, "empty separator");
1773 return NULL;
1774 }
1775 else if (n == 1)
1776 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001777
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001778 list = PyList_New(PREALLOC_SIZE(maxsplit));
1779 if (list == NULL)
1780 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001781
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001782 j = len;
1783 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001784
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001785 s = PyString_AS_STRING(self);
1786 while ( (i >= 0) && (maxsplit-- > 0) ) {
1787 for (; i>=0; i--) {
1788 if (Py_STRING_MATCH(s, i, sub, n)) {
1789 SPLIT_ADD(s, i + n, j);
1790 j = i;
1791 i -= n;
1792 break;
1793 }
1794 }
1795 }
1796 SPLIT_ADD(s, 0, j);
1797 FIX_PREALLOC_SIZE(list);
1798 if (PyList_Reverse(list) < 0)
1799 goto onError;
1800 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001801
1802onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001803 Py_DECREF(list);
1804 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001805}
1806
1807
1808PyDoc_STRVAR(join__doc__,
Georg Brandl5d2eb342009-10-27 15:08:27 +00001809"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001810\n\
1811Return a string which is the concatenation of the strings in the\n\
Georg Brandl5d2eb342009-10-27 15:08:27 +00001812iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001813
1814static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001815string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001816{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001817 char *sep = PyString_AS_STRING(self);
1818 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1819 PyObject *res = NULL;
1820 char *p;
1821 Py_ssize_t seqlen = 0;
1822 size_t sz = 0;
1823 Py_ssize_t i;
1824 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001825
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001826 seq = PySequence_Fast(orig, "");
1827 if (seq == NULL) {
1828 return NULL;
1829 }
Christian Heimes44720832008-05-26 13:01:01 +00001830
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001831 seqlen = PySequence_Size(seq);
1832 if (seqlen == 0) {
1833 Py_DECREF(seq);
1834 return PyString_FromString("");
1835 }
1836 if (seqlen == 1) {
1837 item = PySequence_Fast_GET_ITEM(seq, 0);
1838 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1839 Py_INCREF(item);
1840 Py_DECREF(seq);
1841 return item;
1842 }
1843 }
Christian Heimes44720832008-05-26 13:01:01 +00001844
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001845 /* There are at least two things to join, or else we have a subclass
1846 * of the builtin types in the sequence.
1847 * Do a pre-pass to figure out the total amount of space we'll
1848 * need (sz), see whether any argument is absurd, and defer to
1849 * the Unicode join if appropriate.
1850 */
1851 for (i = 0; i < seqlen; i++) {
1852 const size_t old_sz = sz;
1853 item = PySequence_Fast_GET_ITEM(seq, i);
1854 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001855#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001856 if (PyUnicode_Check(item)) {
1857 /* Defer to Unicode join.
1858 * CAUTION: There's no gurantee that the
1859 * original sequence can be iterated over
1860 * again, so we must pass seq here.
1861 */
1862 PyObject *result;
1863 result = PyUnicode_Join((PyObject *)self, seq);
1864 Py_DECREF(seq);
1865 return result;
1866 }
Christian Heimes44720832008-05-26 13:01:01 +00001867#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001868 PyErr_Format(PyExc_TypeError,
1869 "sequence item %zd: expected string,"
1870 " %.80s found",
1871 i, Py_TYPE(item)->tp_name);
1872 Py_DECREF(seq);
1873 return NULL;
1874 }
1875 sz += PyString_GET_SIZE(item);
1876 if (i != 0)
1877 sz += seplen;
1878 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1879 PyErr_SetString(PyExc_OverflowError,
1880 "join() result is too long for a Python string");
1881 Py_DECREF(seq);
1882 return NULL;
1883 }
1884 }
Christian Heimes44720832008-05-26 13:01:01 +00001885
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001886 /* Allocate result space. */
1887 res = PyString_FromStringAndSize((char*)NULL, sz);
1888 if (res == NULL) {
1889 Py_DECREF(seq);
1890 return NULL;
1891 }
Christian Heimes44720832008-05-26 13:01:01 +00001892
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001893 /* Catenate everything. */
1894 p = PyString_AS_STRING(res);
1895 for (i = 0; i < seqlen; ++i) {
1896 size_t n;
1897 item = PySequence_Fast_GET_ITEM(seq, i);
1898 n = PyString_GET_SIZE(item);
1899 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1900 p += n;
1901 if (i < seqlen - 1) {
1902 Py_MEMCPY(p, sep, seplen);
1903 p += seplen;
1904 }
1905 }
Christian Heimes44720832008-05-26 13:01:01 +00001906
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001907 Py_DECREF(seq);
1908 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001909}
1910
1911PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001912_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001913{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001914 assert(sep != NULL && PyString_Check(sep));
1915 assert(x != NULL);
1916 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001917}
1918
1919Py_LOCAL_INLINE(void)
1920string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1921{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001922 if (*end > len)
1923 *end = len;
1924 else if (*end < 0)
1925 *end += len;
1926 if (*end < 0)
1927 *end = 0;
1928 if (*start < 0)
1929 *start += len;
1930 if (*start < 0)
1931 *start = 0;
Christian Heimes44720832008-05-26 13:01:01 +00001932}
1933
1934Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001935string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001936{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001937 PyObject *subobj;
1938 const char *sub;
1939 Py_ssize_t sub_len;
1940 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1941 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes44720832008-05-26 13:01:01 +00001942
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001943 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1944 &obj_start, &obj_end))
1945 return -2;
1946 /* To support None in "start" and "end" arguments, meaning
1947 the same as if they were not passed.
1948 */
1949 if (obj_start != Py_None)
1950 if (!_PyEval_SliceIndex(obj_start, &start))
1951 return -2;
1952 if (obj_end != Py_None)
1953 if (!_PyEval_SliceIndex(obj_end, &end))
1954 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001955
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001956 if (PyString_Check(subobj)) {
1957 sub = PyString_AS_STRING(subobj);
1958 sub_len = PyString_GET_SIZE(subobj);
1959 }
Christian Heimes44720832008-05-26 13:01:01 +00001960#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001961 else if (PyUnicode_Check(subobj))
1962 return PyUnicode_Find(
1963 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001964#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001965 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1966 /* XXX - the "expected a character buffer object" is pretty
1967 confusing for a non-expert. remap to something else ? */
1968 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001969
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001970 if (dir > 0)
1971 return stringlib_find_slice(
1972 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1973 sub, sub_len, start, end);
1974 else
1975 return stringlib_rfind_slice(
1976 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1977 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001978}
1979
1980
1981PyDoc_STRVAR(find__doc__,
1982"S.find(sub [,start [,end]]) -> int\n\
1983\n\
1984Return the lowest index in S where substring sub is found,\n\
1985such that sub is contained within s[start:end]. Optional\n\
1986arguments start and end are interpreted as in slice notation.\n\
1987\n\
1988Return -1 on failure.");
1989
1990static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001991string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001992{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001993 Py_ssize_t result = string_find_internal(self, args, +1);
1994 if (result == -2)
1995 return NULL;
1996 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001997}
1998
1999
2000PyDoc_STRVAR(index__doc__,
2001"S.index(sub [,start [,end]]) -> int\n\
2002\n\
2003Like S.find() but raise ValueError when the substring is not found.");
2004
2005static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002006string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002007{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002008 Py_ssize_t result = string_find_internal(self, args, +1);
2009 if (result == -2)
2010 return NULL;
2011 if (result == -1) {
2012 PyErr_SetString(PyExc_ValueError,
2013 "substring not found");
2014 return NULL;
2015 }
2016 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002017}
2018
2019
2020PyDoc_STRVAR(rfind__doc__,
2021"S.rfind(sub [,start [,end]]) -> int\n\
2022\n\
2023Return the highest index in S where substring sub is found,\n\
2024such that sub is contained within s[start:end]. Optional\n\
2025arguments start and end are interpreted as in slice notation.\n\
2026\n\
2027Return -1 on failure.");
2028
2029static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002030string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002031{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002032 Py_ssize_t result = string_find_internal(self, args, -1);
2033 if (result == -2)
2034 return NULL;
2035 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002036}
2037
2038
2039PyDoc_STRVAR(rindex__doc__,
2040"S.rindex(sub [,start [,end]]) -> int\n\
2041\n\
2042Like S.rfind() but raise ValueError when the substring is not found.");
2043
2044static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002045string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002046{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002047 Py_ssize_t result = string_find_internal(self, args, -1);
2048 if (result == -2)
2049 return NULL;
2050 if (result == -1) {
2051 PyErr_SetString(PyExc_ValueError,
2052 "substring not found");
2053 return NULL;
2054 }
2055 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002056}
2057
2058
2059Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002060do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002061{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002062 char *s = PyString_AS_STRING(self);
2063 Py_ssize_t len = PyString_GET_SIZE(self);
2064 char *sep = PyString_AS_STRING(sepobj);
2065 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2066 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002067
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002068 i = 0;
2069 if (striptype != RIGHTSTRIP) {
2070 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2071 i++;
2072 }
2073 }
Christian Heimes44720832008-05-26 13:01:01 +00002074
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002075 j = len;
2076 if (striptype != LEFTSTRIP) {
2077 do {
2078 j--;
2079 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2080 j++;
2081 }
Christian Heimes44720832008-05-26 13:01:01 +00002082
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002083 if (i == 0 && j == len && PyString_CheckExact(self)) {
2084 Py_INCREF(self);
2085 return (PyObject*)self;
2086 }
2087 else
2088 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002089}
2090
2091
2092Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002093do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002094{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002095 char *s = PyString_AS_STRING(self);
2096 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002097
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002098 i = 0;
2099 if (striptype != RIGHTSTRIP) {
2100 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2101 i++;
2102 }
2103 }
Christian Heimes44720832008-05-26 13:01:01 +00002104
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002105 j = len;
2106 if (striptype != LEFTSTRIP) {
2107 do {
2108 j--;
2109 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2110 j++;
2111 }
Christian Heimes44720832008-05-26 13:01:01 +00002112
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002113 if (i == 0 && j == len && PyString_CheckExact(self)) {
2114 Py_INCREF(self);
2115 return (PyObject*)self;
2116 }
2117 else
2118 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002119}
2120
2121
2122Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002123do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002124{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002125 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002126
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002127 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2128 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002129
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002130 if (sep != NULL && sep != Py_None) {
2131 if (PyString_Check(sep))
2132 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00002133#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002134 else if (PyUnicode_Check(sep)) {
2135 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2136 PyObject *res;
2137 if (uniself==NULL)
2138 return NULL;
2139 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2140 striptype, sep);
2141 Py_DECREF(uniself);
2142 return res;
2143 }
Christian Heimes44720832008-05-26 13:01:01 +00002144#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002145 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00002146#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002147 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00002148#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002149 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00002150#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002151 STRIPNAME(striptype));
2152 return NULL;
2153 }
Christian Heimes44720832008-05-26 13:01:01 +00002154
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002155 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00002156}
2157
2158
2159PyDoc_STRVAR(strip__doc__,
2160"S.strip([chars]) -> string or unicode\n\
2161\n\
2162Return a copy of the string S with leading and trailing\n\
2163whitespace removed.\n\
2164If chars is given and not None, remove characters in chars instead.\n\
2165If chars is unicode, S will be converted to unicode before stripping");
2166
2167static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002168string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002169{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002170 if (PyTuple_GET_SIZE(args) == 0)
2171 return do_strip(self, BOTHSTRIP); /* Common case */
2172 else
2173 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00002174}
2175
2176
2177PyDoc_STRVAR(lstrip__doc__,
2178"S.lstrip([chars]) -> string or unicode\n\
2179\n\
2180Return a copy of the string S with leading whitespace removed.\n\
2181If chars is given and not None, remove characters in chars instead.\n\
2182If chars is unicode, S will be converted to unicode before stripping");
2183
2184static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002185string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002186{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002187 if (PyTuple_GET_SIZE(args) == 0)
2188 return do_strip(self, LEFTSTRIP); /* Common case */
2189 else
2190 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00002191}
2192
2193
2194PyDoc_STRVAR(rstrip__doc__,
2195"S.rstrip([chars]) -> string or unicode\n\
2196\n\
2197Return a copy of the string S with trailing whitespace removed.\n\
2198If chars is given and not None, remove characters in chars instead.\n\
2199If chars is unicode, S will be converted to unicode before stripping");
2200
2201static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002202string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002203{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002204 if (PyTuple_GET_SIZE(args) == 0)
2205 return do_strip(self, RIGHTSTRIP); /* Common case */
2206 else
2207 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00002208}
2209
2210
2211PyDoc_STRVAR(lower__doc__,
2212"S.lower() -> string\n\
2213\n\
2214Return a copy of the string S converted to lowercase.");
2215
2216/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2217#ifndef _tolower
2218#define _tolower tolower
2219#endif
2220
2221static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002222string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002223{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002224 char *s;
2225 Py_ssize_t i, n = PyString_GET_SIZE(self);
2226 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002227
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002228 newobj = PyString_FromStringAndSize(NULL, n);
2229 if (!newobj)
2230 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002231
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002232 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002233
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002234 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002235
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002236 for (i = 0; i < n; i++) {
2237 int c = Py_CHARMASK(s[i]);
2238 if (isupper(c))
2239 s[i] = _tolower(c);
2240 }
Christian Heimes44720832008-05-26 13:01:01 +00002241
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002242 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002243}
2244
2245PyDoc_STRVAR(upper__doc__,
2246"S.upper() -> string\n\
2247\n\
2248Return a copy of the string S converted to uppercase.");
2249
2250#ifndef _toupper
2251#define _toupper toupper
2252#endif
2253
2254static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002255string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002256{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002257 char *s;
2258 Py_ssize_t i, n = PyString_GET_SIZE(self);
2259 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002260
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002261 newobj = PyString_FromStringAndSize(NULL, n);
2262 if (!newobj)
2263 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002264
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002265 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002266
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002267 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002268
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002269 for (i = 0; i < n; i++) {
2270 int c = Py_CHARMASK(s[i]);
2271 if (islower(c))
2272 s[i] = _toupper(c);
2273 }
Christian Heimes44720832008-05-26 13:01:01 +00002274
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002275 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002276}
2277
2278PyDoc_STRVAR(title__doc__,
2279"S.title() -> string\n\
2280\n\
2281Return a titlecased version of S, i.e. words start with uppercase\n\
2282characters, all remaining cased characters have lowercase.");
2283
2284static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002285string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002286{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002287 char *s = PyString_AS_STRING(self), *s_new;
2288 Py_ssize_t i, n = PyString_GET_SIZE(self);
2289 int previous_is_cased = 0;
2290 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002291
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002292 newobj = PyString_FromStringAndSize(NULL, n);
2293 if (newobj == NULL)
2294 return NULL;
2295 s_new = PyString_AsString(newobj);
2296 for (i = 0; i < n; i++) {
2297 int c = Py_CHARMASK(*s++);
2298 if (islower(c)) {
2299 if (!previous_is_cased)
2300 c = toupper(c);
2301 previous_is_cased = 1;
2302 } else if (isupper(c)) {
2303 if (previous_is_cased)
2304 c = tolower(c);
2305 previous_is_cased = 1;
2306 } else
2307 previous_is_cased = 0;
2308 *s_new++ = c;
2309 }
2310 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002311}
2312
2313PyDoc_STRVAR(capitalize__doc__,
2314"S.capitalize() -> string\n\
2315\n\
2316Return a copy of the string S with only its first character\n\
2317capitalized.");
2318
2319static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002320string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002321{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002322 char *s = PyString_AS_STRING(self), *s_new;
2323 Py_ssize_t i, n = PyString_GET_SIZE(self);
2324 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002325
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002326 newobj = PyString_FromStringAndSize(NULL, n);
2327 if (newobj == NULL)
2328 return NULL;
2329 s_new = PyString_AsString(newobj);
2330 if (0 < n) {
2331 int c = Py_CHARMASK(*s++);
2332 if (islower(c))
2333 *s_new = toupper(c);
2334 else
2335 *s_new = c;
2336 s_new++;
2337 }
2338 for (i = 1; i < n; i++) {
2339 int c = Py_CHARMASK(*s++);
2340 if (isupper(c))
2341 *s_new = tolower(c);
2342 else
2343 *s_new = c;
2344 s_new++;
2345 }
2346 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002347}
2348
2349
2350PyDoc_STRVAR(count__doc__,
2351"S.count(sub[, start[, end]]) -> int\n\
2352\n\
2353Return the number of non-overlapping occurrences of substring sub in\n\
2354string S[start:end]. Optional arguments start and end are interpreted\n\
2355as in slice notation.");
2356
2357static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002358string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002359{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002360 PyObject *sub_obj;
2361 const char *str = PyString_AS_STRING(self), *sub;
2362 Py_ssize_t sub_len;
2363 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002364
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002365 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2366 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2367 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002368
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002369 if (PyString_Check(sub_obj)) {
2370 sub = PyString_AS_STRING(sub_obj);
2371 sub_len = PyString_GET_SIZE(sub_obj);
2372 }
Christian Heimes44720832008-05-26 13:01:01 +00002373#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002374 else if (PyUnicode_Check(sub_obj)) {
2375 Py_ssize_t count;
2376 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2377 if (count == -1)
2378 return NULL;
2379 else
2380 return PyInt_FromSsize_t(count);
2381 }
Christian Heimes44720832008-05-26 13:01:01 +00002382#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002383 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2384 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002385
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002386 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002387
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002388 return PyInt_FromSsize_t(
2389 stringlib_count(str + start, end - start, sub, sub_len)
2390 );
Christian Heimes44720832008-05-26 13:01:01 +00002391}
2392
2393PyDoc_STRVAR(swapcase__doc__,
2394"S.swapcase() -> string\n\
2395\n\
2396Return a copy of the string S with uppercase characters\n\
2397converted to lowercase and vice versa.");
2398
2399static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002400string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002401{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002402 char *s = PyString_AS_STRING(self), *s_new;
2403 Py_ssize_t i, n = PyString_GET_SIZE(self);
2404 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002405
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002406 newobj = PyString_FromStringAndSize(NULL, n);
2407 if (newobj == NULL)
2408 return NULL;
2409 s_new = PyString_AsString(newobj);
2410 for (i = 0; i < n; i++) {
2411 int c = Py_CHARMASK(*s++);
2412 if (islower(c)) {
2413 *s_new = toupper(c);
2414 }
2415 else if (isupper(c)) {
2416 *s_new = tolower(c);
2417 }
2418 else
2419 *s_new = c;
2420 s_new++;
2421 }
2422 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002423}
2424
2425
2426PyDoc_STRVAR(translate__doc__,
2427"S.translate(table [,deletechars]) -> string\n\
2428\n\
2429Return a copy of the string S, where all characters occurring\n\
2430in the optional argument deletechars are removed, and the\n\
2431remaining characters have been mapped through the given\n\
2432translation table, which must be a string of length 256.");
2433
2434static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002435string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002436{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002437 register char *input, *output;
2438 const char *table;
2439 register Py_ssize_t i, c, changed = 0;
2440 PyObject *input_obj = (PyObject*)self;
2441 const char *output_start, *del_table=NULL;
2442 Py_ssize_t inlen, tablen, dellen = 0;
2443 PyObject *result;
2444 int trans_table[256];
2445 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002446
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002447 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2448 &tableobj, &delobj))
2449 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002450
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002451 if (PyString_Check(tableobj)) {
2452 table = PyString_AS_STRING(tableobj);
2453 tablen = PyString_GET_SIZE(tableobj);
2454 }
2455 else if (tableobj == Py_None) {
2456 table = NULL;
2457 tablen = 256;
2458 }
Christian Heimes44720832008-05-26 13:01:01 +00002459#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002460 else if (PyUnicode_Check(tableobj)) {
2461 /* Unicode .translate() does not support the deletechars
2462 parameter; instead a mapping to None will cause characters
2463 to be deleted. */
2464 if (delobj != NULL) {
2465 PyErr_SetString(PyExc_TypeError,
2466 "deletions are implemented differently for unicode");
2467 return NULL;
2468 }
2469 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2470 }
Christian Heimes44720832008-05-26 13:01:01 +00002471#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002472 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2473 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002474
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002475 if (tablen != 256) {
2476 PyErr_SetString(PyExc_ValueError,
2477 "translation table must be 256 characters long");
2478 return NULL;
2479 }
Christian Heimes44720832008-05-26 13:01:01 +00002480
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002481 if (delobj != NULL) {
2482 if (PyString_Check(delobj)) {
2483 del_table = PyString_AS_STRING(delobj);
2484 dellen = PyString_GET_SIZE(delobj);
2485 }
Christian Heimes44720832008-05-26 13:01:01 +00002486#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002487 else if (PyUnicode_Check(delobj)) {
2488 PyErr_SetString(PyExc_TypeError,
2489 "deletions are implemented differently for unicode");
2490 return NULL;
2491 }
Christian Heimes44720832008-05-26 13:01:01 +00002492#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002493 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2494 return NULL;
2495 }
2496 else {
2497 del_table = NULL;
2498 dellen = 0;
2499 }
Christian Heimes44720832008-05-26 13:01:01 +00002500
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002501 inlen = PyString_GET_SIZE(input_obj);
2502 result = PyString_FromStringAndSize((char *)NULL, inlen);
2503 if (result == NULL)
2504 return NULL;
2505 output_start = output = PyString_AsString(result);
2506 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002507
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002508 if (dellen == 0 && table != NULL) {
2509 /* If no deletions are required, use faster code */
2510 for (i = inlen; --i >= 0; ) {
2511 c = Py_CHARMASK(*input++);
2512 if (Py_CHARMASK((*output++ = table[c])) != c)
2513 changed = 1;
2514 }
2515 if (changed || !PyString_CheckExact(input_obj))
2516 return result;
2517 Py_DECREF(result);
2518 Py_INCREF(input_obj);
2519 return input_obj;
2520 }
Christian Heimes44720832008-05-26 13:01:01 +00002521
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002522 if (table == NULL) {
2523 for (i = 0; i < 256; i++)
2524 trans_table[i] = Py_CHARMASK(i);
2525 } else {
2526 for (i = 0; i < 256; i++)
2527 trans_table[i] = Py_CHARMASK(table[i]);
2528 }
Christian Heimes44720832008-05-26 13:01:01 +00002529
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002530 for (i = 0; i < dellen; i++)
2531 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002532
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002533 for (i = inlen; --i >= 0; ) {
2534 c = Py_CHARMASK(*input++);
2535 if (trans_table[c] != -1)
2536 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2537 continue;
2538 changed = 1;
2539 }
2540 if (!changed && PyString_CheckExact(input_obj)) {
2541 Py_DECREF(result);
2542 Py_INCREF(input_obj);
2543 return input_obj;
2544 }
2545 /* Fix the size of the resulting string */
2546 if (inlen > 0)
2547 _PyString_Resize(&result, output - output_start);
2548 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002549}
2550
2551
2552#define FORWARD 1
2553#define REVERSE -1
2554
2555/* find and count characters and substrings */
2556
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002557#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002558 ((char *)memchr((const void *)(target), c, target_len))
2559
2560/* String ops must return a string. */
2561/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002562Py_LOCAL(PyStringObject *)
2563return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002564{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002565 if (PyString_CheckExact(self)) {
2566 Py_INCREF(self);
2567 return self;
2568 }
2569 return (PyStringObject *)PyString_FromStringAndSize(
2570 PyString_AS_STRING(self),
2571 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002572}
2573
2574Py_LOCAL_INLINE(Py_ssize_t)
2575countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2576{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002577 Py_ssize_t count=0;
2578 const char *start=target;
2579 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002580
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002581 while ( (start=findchar(start, end-start, c)) != NULL ) {
2582 count++;
2583 if (count >= maxcount)
2584 break;
2585 start += 1;
2586 }
2587 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002588}
2589
2590Py_LOCAL(Py_ssize_t)
2591findstring(const char *target, Py_ssize_t target_len,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002592 const char *pattern, Py_ssize_t pattern_len,
2593 Py_ssize_t start,
2594 Py_ssize_t end,
2595 int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002596{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002597 if (start < 0) {
2598 start += target_len;
2599 if (start < 0)
2600 start = 0;
2601 }
2602 if (end > target_len) {
2603 end = target_len;
2604 } else if (end < 0) {
2605 end += target_len;
2606 if (end < 0)
2607 end = 0;
2608 }
Christian Heimes44720832008-05-26 13:01:01 +00002609
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002610 /* zero-length substrings always match at the first attempt */
2611 if (pattern_len == 0)
2612 return (direction > 0) ? start : end;
Christian Heimes44720832008-05-26 13:01:01 +00002613
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002614 end -= pattern_len;
Christian Heimes44720832008-05-26 13:01:01 +00002615
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002616 if (direction < 0) {
2617 for (; end >= start; end--)
2618 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2619 return end;
2620 } else {
2621 for (; start <= end; start++)
2622 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2623 return start;
2624 }
2625 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00002626}
2627
2628Py_LOCAL_INLINE(Py_ssize_t)
2629countstring(const char *target, Py_ssize_t target_len,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002630 const char *pattern, Py_ssize_t pattern_len,
2631 Py_ssize_t start,
2632 Py_ssize_t end,
2633 int direction, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002634{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002635 Py_ssize_t count=0;
Christian Heimes44720832008-05-26 13:01:01 +00002636
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002637 if (start < 0) {
2638 start += target_len;
2639 if (start < 0)
2640 start = 0;
2641 }
2642 if (end > target_len) {
2643 end = target_len;
2644 } else if (end < 0) {
2645 end += target_len;
2646 if (end < 0)
2647 end = 0;
2648 }
Christian Heimes44720832008-05-26 13:01:01 +00002649
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002650 /* zero-length substrings match everywhere */
2651 if (pattern_len == 0 || maxcount == 0) {
2652 if (target_len+1 < maxcount)
2653 return target_len+1;
2654 return maxcount;
2655 }
Christian Heimes44720832008-05-26 13:01:01 +00002656
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002657 end -= pattern_len;
2658 if (direction < 0) {
2659 for (; (end >= start); end--)
2660 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2661 count++;
2662 if (--maxcount <= 0) break;
2663 end -= pattern_len-1;
2664 }
2665 } else {
2666 for (; (start <= end); start++)
2667 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2668 count++;
2669 if (--maxcount <= 0)
2670 break;
2671 start += pattern_len-1;
2672 }
2673 }
2674 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002675}
2676
2677
2678/* Algorithms for different cases of string replacement */
2679
2680/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002681Py_LOCAL(PyStringObject *)
2682replace_interleave(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002683 const char *to_s, Py_ssize_t to_len,
2684 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002685{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002686 char *self_s, *result_s;
2687 Py_ssize_t self_len, result_len;
2688 Py_ssize_t count, i, product;
2689 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002690
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002691 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002692
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002693 /* 1 at the end plus 1 after every character */
2694 count = self_len+1;
2695 if (maxcount < count)
2696 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002697
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002698 /* Check for overflow */
2699 /* result_len = count * to_len + self_len; */
2700 product = count * to_len;
2701 if (product / to_len != count) {
2702 PyErr_SetString(PyExc_OverflowError,
2703 "replace string is too long");
2704 return NULL;
2705 }
2706 result_len = product + self_len;
2707 if (result_len < 0) {
2708 PyErr_SetString(PyExc_OverflowError,
2709 "replace string is too long");
2710 return NULL;
2711 }
Christian Heimes44720832008-05-26 13:01:01 +00002712
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002713 if (! (result = (PyStringObject *)
2714 PyString_FromStringAndSize(NULL, result_len)) )
2715 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002716
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002717 self_s = PyString_AS_STRING(self);
2718 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002719
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002720 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002721
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002722 /* Lay the first one down (guaranteed this will occur) */
2723 Py_MEMCPY(result_s, to_s, to_len);
2724 result_s += to_len;
2725 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002726
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002727 for (i=0; i<count; i++) {
2728 *result_s++ = *self_s++;
2729 Py_MEMCPY(result_s, to_s, to_len);
2730 result_s += to_len;
2731 }
2732
2733 /* Copy the rest of the original string */
2734 Py_MEMCPY(result_s, self_s, self_len-i);
2735
2736 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002737}
2738
2739/* Special case for deleting a single character */
2740/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002741Py_LOCAL(PyStringObject *)
2742replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002743 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002744{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002745 char *self_s, *result_s;
2746 char *start, *next, *end;
2747 Py_ssize_t self_len, result_len;
2748 Py_ssize_t count;
2749 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002750
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002751 self_len = PyString_GET_SIZE(self);
2752 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002753
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002754 count = countchar(self_s, self_len, from_c, maxcount);
2755 if (count == 0) {
2756 return return_self(self);
2757 }
Christian Heimes44720832008-05-26 13:01:01 +00002758
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002759 result_len = self_len - count; /* from_len == 1 */
2760 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002761
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002762 if ( (result = (PyStringObject *)
2763 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2764 return NULL;
2765 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002766
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002767 start = self_s;
2768 end = self_s + self_len;
2769 while (count-- > 0) {
2770 next = findchar(start, end-start, from_c);
2771 if (next == NULL)
2772 break;
2773 Py_MEMCPY(result_s, start, next-start);
2774 result_s += (next-start);
2775 start = next+1;
2776 }
2777 Py_MEMCPY(result_s, start, end-start);
2778
2779 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002780}
2781
2782/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2783
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002784Py_LOCAL(PyStringObject *)
2785replace_delete_substring(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002786 const char *from_s, Py_ssize_t from_len,
2787 Py_ssize_t maxcount) {
2788 char *self_s, *result_s;
2789 char *start, *next, *end;
2790 Py_ssize_t self_len, result_len;
2791 Py_ssize_t count, offset;
2792 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002793
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002794 self_len = PyString_GET_SIZE(self);
2795 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002796
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002797 count = countstring(self_s, self_len,
2798 from_s, from_len,
2799 0, self_len, 1,
2800 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002801
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002802 if (count == 0) {
2803 /* no matches */
2804 return return_self(self);
2805 }
Christian Heimes44720832008-05-26 13:01:01 +00002806
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002807 result_len = self_len - (count * from_len);
2808 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002809
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002810 if ( (result = (PyStringObject *)
2811 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2812 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002813
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002814 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002815
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002816 start = self_s;
2817 end = self_s + self_len;
2818 while (count-- > 0) {
2819 offset = findstring(start, end-start,
2820 from_s, from_len,
2821 0, end-start, FORWARD);
2822 if (offset == -1)
2823 break;
2824 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002825
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002826 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002827
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002828 result_s += (next-start);
2829 start = next+from_len;
2830 }
2831 Py_MEMCPY(result_s, start, end-start);
2832 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002833}
2834
2835/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002836Py_LOCAL(PyStringObject *)
2837replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002838 char from_c, char to_c,
2839 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002840{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002841 char *self_s, *result_s, *start, *end, *next;
2842 Py_ssize_t self_len;
2843 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002844
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002845 /* The result string will be the same size */
2846 self_s = PyString_AS_STRING(self);
2847 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002848
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002849 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002850
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002851 if (next == NULL) {
2852 /* No matches; return the original string */
2853 return return_self(self);
2854 }
Christian Heimes44720832008-05-26 13:01:01 +00002855
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002856 /* Need to make a new string */
2857 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2858 if (result == NULL)
2859 return NULL;
2860 result_s = PyString_AS_STRING(result);
2861 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002862
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002863 /* change everything in-place, starting with this one */
2864 start = result_s + (next-self_s);
2865 *start = to_c;
2866 start++;
2867 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002868
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002869 while (--maxcount > 0) {
2870 next = findchar(start, end-start, from_c);
2871 if (next == NULL)
2872 break;
2873 *next = to_c;
2874 start = next+1;
2875 }
Christian Heimes44720832008-05-26 13:01:01 +00002876
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002877 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002878}
2879
2880/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002881Py_LOCAL(PyStringObject *)
2882replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002883 const char *from_s, Py_ssize_t from_len,
2884 const char *to_s, Py_ssize_t to_len,
2885 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002886{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002887 char *result_s, *start, *end;
2888 char *self_s;
2889 Py_ssize_t self_len, offset;
2890 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002891
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002892 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002893
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002894 self_s = PyString_AS_STRING(self);
2895 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002896
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002897 offset = findstring(self_s, self_len,
2898 from_s, from_len,
2899 0, self_len, FORWARD);
2900 if (offset == -1) {
2901 /* No matches; return the original string */
2902 return return_self(self);
2903 }
Christian Heimes44720832008-05-26 13:01:01 +00002904
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002905 /* Need to make a new string */
2906 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2907 if (result == NULL)
2908 return NULL;
2909 result_s = PyString_AS_STRING(result);
2910 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002911
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002912 /* change everything in-place, starting with this one */
2913 start = result_s + offset;
2914 Py_MEMCPY(start, to_s, from_len);
2915 start += from_len;
2916 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002917
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002918 while ( --maxcount > 0) {
2919 offset = findstring(start, end-start,
2920 from_s, from_len,
2921 0, end-start, FORWARD);
2922 if (offset==-1)
2923 break;
2924 Py_MEMCPY(start+offset, to_s, from_len);
2925 start += offset+from_len;
2926 }
Christian Heimes44720832008-05-26 13:01:01 +00002927
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002928 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002929}
2930
2931/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002932Py_LOCAL(PyStringObject *)
2933replace_single_character(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002934 char from_c,
2935 const char *to_s, Py_ssize_t to_len,
2936 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002937{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002938 char *self_s, *result_s;
2939 char *start, *next, *end;
2940 Py_ssize_t self_len, result_len;
2941 Py_ssize_t count, product;
2942 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002943
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002944 self_s = PyString_AS_STRING(self);
2945 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002946
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002947 count = countchar(self_s, self_len, from_c, maxcount);
2948 if (count == 0) {
2949 /* no matches, return unchanged */
2950 return return_self(self);
2951 }
Christian Heimes44720832008-05-26 13:01:01 +00002952
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002953 /* use the difference between current and new, hence the "-1" */
2954 /* result_len = self_len + count * (to_len-1) */
2955 product = count * (to_len-1);
2956 if (product / (to_len-1) != count) {
2957 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2958 return NULL;
2959 }
2960 result_len = self_len + product;
2961 if (result_len < 0) {
2962 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2963 return NULL;
2964 }
Christian Heimes44720832008-05-26 13:01:01 +00002965
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002966 if ( (result = (PyStringObject *)
2967 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2968 return NULL;
2969 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002970
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002971 start = self_s;
2972 end = self_s + self_len;
2973 while (count-- > 0) {
2974 next = findchar(start, end-start, from_c);
2975 if (next == NULL)
2976 break;
Christian Heimes44720832008-05-26 13:01:01 +00002977
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002978 if (next == start) {
2979 /* replace with the 'to' */
2980 Py_MEMCPY(result_s, to_s, to_len);
2981 result_s += to_len;
2982 start += 1;
2983 } else {
2984 /* copy the unchanged old then the 'to' */
2985 Py_MEMCPY(result_s, start, next-start);
2986 result_s += (next-start);
2987 Py_MEMCPY(result_s, to_s, to_len);
2988 result_s += to_len;
2989 start = next+1;
2990 }
2991 }
2992 /* Copy the remainder of the remaining string */
2993 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002994
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002995 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002996}
2997
2998/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002999Py_LOCAL(PyStringObject *)
3000replace_substring(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003001 const char *from_s, Py_ssize_t from_len,
3002 const char *to_s, Py_ssize_t to_len,
3003 Py_ssize_t maxcount) {
3004 char *self_s, *result_s;
3005 char *start, *next, *end;
3006 Py_ssize_t self_len, result_len;
3007 Py_ssize_t count, offset, product;
3008 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003009
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003010 self_s = PyString_AS_STRING(self);
3011 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003012
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003013 count = countstring(self_s, self_len,
3014 from_s, from_len,
3015 0, self_len, FORWARD, maxcount);
3016 if (count == 0) {
3017 /* no matches, return unchanged */
3018 return return_self(self);
3019 }
Christian Heimes44720832008-05-26 13:01:01 +00003020
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003021 /* Check for overflow */
3022 /* result_len = self_len + count * (to_len-from_len) */
3023 product = count * (to_len-from_len);
3024 if (product / (to_len-from_len) != count) {
3025 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3026 return NULL;
3027 }
3028 result_len = self_len + product;
3029 if (result_len < 0) {
3030 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3031 return NULL;
3032 }
Christian Heimes44720832008-05-26 13:01:01 +00003033
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003034 if ( (result = (PyStringObject *)
3035 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3036 return NULL;
3037 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003038
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003039 start = self_s;
3040 end = self_s + self_len;
3041 while (count-- > 0) {
3042 offset = findstring(start, end-start,
3043 from_s, from_len,
3044 0, end-start, FORWARD);
3045 if (offset == -1)
3046 break;
3047 next = start+offset;
3048 if (next == start) {
3049 /* replace with the 'to' */
3050 Py_MEMCPY(result_s, to_s, to_len);
3051 result_s += to_len;
3052 start += from_len;
3053 } else {
3054 /* copy the unchanged old then the 'to' */
3055 Py_MEMCPY(result_s, start, next-start);
3056 result_s += (next-start);
3057 Py_MEMCPY(result_s, to_s, to_len);
3058 result_s += to_len;
3059 start = next+from_len;
3060 }
3061 }
3062 /* Copy the remainder of the remaining string */
3063 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00003064
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003065 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003066}
3067
3068
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003069Py_LOCAL(PyStringObject *)
3070replace(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003071 const char *from_s, Py_ssize_t from_len,
3072 const char *to_s, Py_ssize_t to_len,
3073 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00003074{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003075 if (maxcount < 0) {
3076 maxcount = PY_SSIZE_T_MAX;
3077 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3078 /* nothing to do; return the original string */
3079 return return_self(self);
3080 }
Christian Heimes44720832008-05-26 13:01:01 +00003081
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003082 if (maxcount == 0 ||
3083 (from_len == 0 && to_len == 0)) {
3084 /* nothing to do; return the original string */
3085 return return_self(self);
3086 }
Christian Heimes44720832008-05-26 13:01:01 +00003087
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003088 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00003089
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003090 if (from_len == 0) {
3091 /* insert the 'to' string everywhere. */
3092 /* >>> "Python".replace("", ".") */
3093 /* '.P.y.t.h.o.n.' */
3094 return replace_interleave(self, to_s, to_len, maxcount);
3095 }
Christian Heimes44720832008-05-26 13:01:01 +00003096
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003097 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3098 /* point for an empty self string to generate a non-empty string */
3099 /* Special case so the remaining code always gets a non-empty string */
3100 if (PyString_GET_SIZE(self) == 0) {
3101 return return_self(self);
3102 }
Christian Heimes44720832008-05-26 13:01:01 +00003103
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003104 if (to_len == 0) {
3105 /* delete all occurances of 'from' string */
3106 if (from_len == 1) {
3107 return replace_delete_single_character(
3108 self, from_s[0], maxcount);
3109 } else {
3110 return replace_delete_substring(self, from_s, from_len, maxcount);
3111 }
3112 }
Christian Heimes44720832008-05-26 13:01:01 +00003113
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003114 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00003115
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003116 if (from_len == to_len) {
3117 if (from_len == 1) {
3118 return replace_single_character_in_place(
3119 self,
3120 from_s[0],
3121 to_s[0],
3122 maxcount);
3123 } else {
3124 return replace_substring_in_place(
3125 self, from_s, from_len, to_s, to_len, maxcount);
3126 }
3127 }
Christian Heimes44720832008-05-26 13:01:01 +00003128
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003129 /* Otherwise use the more generic algorithms */
3130 if (from_len == 1) {
3131 return replace_single_character(self, from_s[0],
3132 to_s, to_len, maxcount);
3133 } else {
3134 /* len('from')>=2, len('to')>=1 */
3135 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3136 }
Christian Heimes44720832008-05-26 13:01:01 +00003137}
3138
3139PyDoc_STRVAR(replace__doc__,
Ezio Melotti6327bf12010-06-26 18:47:01 +00003140"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003141\n\
3142Return a copy of string S with all occurrences of substring\n\
3143old replaced by new. If the optional argument count is\n\
3144given, only the first count occurrences are replaced.");
3145
3146static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003147string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003148{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003149 Py_ssize_t count = -1;
3150 PyObject *from, *to;
3151 const char *from_s, *to_s;
3152 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00003153
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003154 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3155 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003156
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003157 if (PyString_Check(from)) {
3158 from_s = PyString_AS_STRING(from);
3159 from_len = PyString_GET_SIZE(from);
3160 }
Christian Heimes44720832008-05-26 13:01:01 +00003161#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003162 if (PyUnicode_Check(from))
3163 return PyUnicode_Replace((PyObject *)self,
3164 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00003165#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003166 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3167 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003168
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003169 if (PyString_Check(to)) {
3170 to_s = PyString_AS_STRING(to);
3171 to_len = PyString_GET_SIZE(to);
3172 }
Christian Heimes44720832008-05-26 13:01:01 +00003173#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003174 else if (PyUnicode_Check(to))
3175 return PyUnicode_Replace((PyObject *)self,
3176 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00003177#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003178 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3179 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003180
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003181 return (PyObject *)replace((PyStringObject *) self,
3182 from_s, from_len,
3183 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00003184}
3185
3186/** End DALKE **/
3187
3188/* Matches the end (direction >= 0) or start (direction < 0) of self
3189 * against substr, using the start and end arguments. Returns
3190 * -1 on error, 0 if not found and 1 if found.
3191 */
3192Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003193_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003194 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00003195{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003196 Py_ssize_t len = PyString_GET_SIZE(self);
3197 Py_ssize_t slen;
3198 const char* sub;
3199 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00003200
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003201 if (PyString_Check(substr)) {
3202 sub = PyString_AS_STRING(substr);
3203 slen = PyString_GET_SIZE(substr);
3204 }
Christian Heimes44720832008-05-26 13:01:01 +00003205#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003206 else if (PyUnicode_Check(substr))
3207 return PyUnicode_Tailmatch((PyObject *)self,
3208 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00003209#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003210 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3211 return -1;
3212 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003213
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003214 string_adjust_indices(&start, &end, len);
Christian Heimes44720832008-05-26 13:01:01 +00003215
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003216 if (direction < 0) {
3217 /* startswith */
3218 if (start+slen > len)
3219 return 0;
3220 } else {
3221 /* endswith */
3222 if (end-start < slen || start > len)
3223 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003224
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003225 if (end-slen > start)
3226 start = end - slen;
3227 }
3228 if (end-start >= slen)
3229 return ! memcmp(str+start, sub, slen);
3230 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003231}
3232
3233
3234PyDoc_STRVAR(startswith__doc__,
3235"S.startswith(prefix[, start[, end]]) -> bool\n\
3236\n\
3237Return True if S starts with the specified prefix, False otherwise.\n\
3238With optional start, test S beginning at that position.\n\
3239With optional end, stop comparing S at that position.\n\
3240prefix can also be a tuple of strings to try.");
3241
3242static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003243string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003244{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003245 Py_ssize_t start = 0;
3246 Py_ssize_t end = PY_SSIZE_T_MAX;
3247 PyObject *subobj;
3248 int result;
Christian Heimes44720832008-05-26 13:01:01 +00003249
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003250 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3251 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3252 return NULL;
3253 if (PyTuple_Check(subobj)) {
3254 Py_ssize_t i;
3255 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3256 result = _string_tailmatch(self,
3257 PyTuple_GET_ITEM(subobj, i),
3258 start, end, -1);
3259 if (result == -1)
3260 return NULL;
3261 else if (result) {
3262 Py_RETURN_TRUE;
3263 }
3264 }
3265 Py_RETURN_FALSE;
3266 }
3267 result = _string_tailmatch(self, subobj, start, end, -1);
3268 if (result == -1)
3269 return NULL;
3270 else
3271 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00003272}
3273
3274
3275PyDoc_STRVAR(endswith__doc__,
3276"S.endswith(suffix[, start[, end]]) -> bool\n\
3277\n\
3278Return True if S ends with the specified suffix, False otherwise.\n\
3279With optional start, test S beginning at that position.\n\
3280With optional end, stop comparing S at that position.\n\
3281suffix can also be a tuple of strings to try.");
3282
3283static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003284string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003285{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003286 Py_ssize_t start = 0;
3287 Py_ssize_t end = PY_SSIZE_T_MAX;
3288 PyObject *subobj;
3289 int result;
Christian Heimes44720832008-05-26 13:01:01 +00003290
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003291 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3292 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3293 return NULL;
3294 if (PyTuple_Check(subobj)) {
3295 Py_ssize_t i;
3296 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3297 result = _string_tailmatch(self,
3298 PyTuple_GET_ITEM(subobj, i),
3299 start, end, +1);
3300 if (result == -1)
3301 return NULL;
3302 else if (result) {
3303 Py_RETURN_TRUE;
3304 }
3305 }
3306 Py_RETURN_FALSE;
3307 }
3308 result = _string_tailmatch(self, subobj, start, end, +1);
3309 if (result == -1)
3310 return NULL;
3311 else
3312 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00003313}
3314
3315
3316PyDoc_STRVAR(encode__doc__,
3317"S.encode([encoding[,errors]]) -> object\n\
3318\n\
3319Encodes S using the codec registered for encoding. encoding defaults\n\
3320to the default encoding. errors may be given to set a different error\n\
3321handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3322a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3323'xmlcharrefreplace' as well as any other name registered with\n\
3324codecs.register_error that is able to handle UnicodeEncodeErrors.");
3325
3326static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003327string_encode(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003328{
3329 char *encoding = NULL;
3330 char *errors = NULL;
3331 PyObject *v;
3332
3333 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003334 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003335 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003336 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003337 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003338 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003339 PyErr_Format(PyExc_TypeError,
3340 "encoder did not return a string/unicode object "
3341 "(type=%.400s)",
3342 Py_TYPE(v)->tp_name);
3343 Py_DECREF(v);
3344 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003345 }
3346 return v;
3347
3348 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003349 return NULL;
3350}
3351
Christian Heimes44720832008-05-26 13:01:01 +00003352
3353PyDoc_STRVAR(decode__doc__,
3354"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003355\n\
Christian Heimes44720832008-05-26 13:01:01 +00003356Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003357to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003358handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3359a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00003360as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003361able to handle UnicodeDecodeErrors.");
3362
3363static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003364string_decode(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003365{
Christian Heimes44720832008-05-26 13:01:01 +00003366 char *encoding = NULL;
3367 char *errors = NULL;
3368 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003369
3370 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003371 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003372 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003373 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003374 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003375 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003376 PyErr_Format(PyExc_TypeError,
3377 "decoder did not return a string/unicode object "
3378 "(type=%.400s)",
3379 Py_TYPE(v)->tp_name);
3380 Py_DECREF(v);
3381 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003382 }
3383 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003384
Christian Heimes44720832008-05-26 13:01:01 +00003385 onError:
3386 return NULL;
3387}
3388
3389
3390PyDoc_STRVAR(expandtabs__doc__,
3391"S.expandtabs([tabsize]) -> string\n\
3392\n\
3393Return a copy of S where all tab characters are expanded using spaces.\n\
3394If tabsize is not given, a tab size of 8 characters is assumed.");
3395
3396static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003397string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003398{
3399 const char *e, *p, *qe;
3400 char *q;
3401 Py_ssize_t i, j, incr;
3402 PyObject *u;
3403 int tabsize = 8;
3404
3405 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003406 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003407
3408 /* First pass: determine size of output string */
3409 i = 0; /* chars up to and including most recent \n or \r */
3410 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003411 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3412 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003413 if (*p == '\t') {
3414 if (tabsize > 0) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003415 incr = tabsize - (j % tabsize);
3416 if (j > PY_SSIZE_T_MAX - incr)
3417 goto overflow1;
3418 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003419 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003420 }
3421 else {
3422 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003423 goto overflow1;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003424 j++;
3425 if (*p == '\n' || *p == '\r') {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003426 if (i > PY_SSIZE_T_MAX - j)
3427 goto overflow1;
3428 i += j;
3429 j = 0;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003430 }
3431 }
Christian Heimes44720832008-05-26 13:01:01 +00003432
3433 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003434 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003435
3436 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003437 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003438 if (!u)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003439 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003440
3441 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003442 q = PyString_AS_STRING(u); /* next output char */
3443 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003444
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003445 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003446 if (*p == '\t') {
3447 if (tabsize > 0) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003448 i = tabsize - (j % tabsize);
3449 j += i;
3450 while (i--) {
3451 if (q >= qe)
3452 goto overflow2;
3453 *q++ = ' ';
3454 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003455 }
3456 }
3457 else {
3458 if (q >= qe)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003459 goto overflow2;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003460 *q++ = *p;
3461 j++;
3462 if (*p == '\n' || *p == '\r')
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003463 j = 0;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003464 }
Christian Heimes44720832008-05-26 13:01:01 +00003465
3466 return u;
3467
3468 overflow2:
3469 Py_DECREF(u);
3470 overflow1:
3471 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3472 return NULL;
3473}
3474
3475Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003476pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003477{
3478 PyObject *u;
3479
3480 if (left < 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003481 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003482 if (right < 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003483 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003484
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003485 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003486 Py_INCREF(self);
3487 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003488 }
3489
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003490 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003491 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003492 if (u) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003493 if (left)
3494 memset(PyString_AS_STRING(u), fill, left);
3495 Py_MEMCPY(PyString_AS_STRING(u) + left,
3496 PyString_AS_STRING(self),
3497 PyString_GET_SIZE(self));
3498 if (right)
3499 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3500 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003501 }
3502
3503 return u;
3504}
3505
3506PyDoc_STRVAR(ljust__doc__,
3507"S.ljust(width[, fillchar]) -> string\n"
3508"\n"
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00003509"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003510"done using the specified fill character (default is a space).");
3511
3512static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003513string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003514{
3515 Py_ssize_t width;
3516 char fillchar = ' ';
3517
3518 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003519 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003520
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003521 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003522 Py_INCREF(self);
3523 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003524 }
3525
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003526 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003527}
3528
3529
3530PyDoc_STRVAR(rjust__doc__,
3531"S.rjust(width[, fillchar]) -> string\n"
3532"\n"
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00003533"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003534"done using the specified fill character (default is a space)");
3535
3536static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003537string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003538{
3539 Py_ssize_t width;
3540 char fillchar = ' ';
3541
3542 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003543 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003544
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003545 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003546 Py_INCREF(self);
3547 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003548 }
3549
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003550 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003551}
3552
3553
3554PyDoc_STRVAR(center__doc__,
3555"S.center(width[, fillchar]) -> string\n"
3556"\n"
3557"Return S centered in a string of length width. Padding is\n"
3558"done using the specified fill character (default is a space)");
3559
3560static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003561string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003562{
3563 Py_ssize_t marg, left;
3564 Py_ssize_t width;
3565 char fillchar = ' ';
3566
3567 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003568 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003569
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003570 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003571 Py_INCREF(self);
3572 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003573 }
3574
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003575 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003576 left = marg / 2 + (marg & width & 1);
3577
3578 return pad(self, left, marg - left, fillchar);
3579}
3580
3581PyDoc_STRVAR(zfill__doc__,
3582"S.zfill(width) -> string\n"
3583"\n"
3584"Pad a numeric string S with zeros on the left, to fill a field\n"
3585"of the specified width. The string S is never truncated.");
3586
3587static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003588string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003589{
3590 Py_ssize_t fill;
3591 PyObject *s;
3592 char *p;
3593 Py_ssize_t width;
3594
3595 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003596 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003597
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003598 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003599 if (PyString_CheckExact(self)) {
3600 Py_INCREF(self);
3601 return (PyObject*) self;
3602 }
3603 else
3604 return PyString_FromStringAndSize(
3605 PyString_AS_STRING(self),
3606 PyString_GET_SIZE(self)
3607 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003608 }
3609
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003610 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003611
Christian Heimes44720832008-05-26 13:01:01 +00003612 s = pad(self, fill, 0, '0');
3613
3614 if (s == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003615 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003616
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003617 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003618 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003619 /* move sign to beginning of string */
3620 p[0] = p[fill];
3621 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003622 }
3623
3624 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003625}
3626
Christian Heimes44720832008-05-26 13:01:01 +00003627PyDoc_STRVAR(isspace__doc__,
3628"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003629\n\
Christian Heimes44720832008-05-26 13:01:01 +00003630Return True if all characters in S are whitespace\n\
3631and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003632
Christian Heimes44720832008-05-26 13:01:01 +00003633static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003634string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003635{
Christian Heimes44720832008-05-26 13:01:01 +00003636 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003637 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003638 register const unsigned char *e;
3639
3640 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003641 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003642 isspace(*p))
3643 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003644
3645 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003646 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003647 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003648
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003649 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003650 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003651 if (!isspace(*p))
3652 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003653 }
Christian Heimes44720832008-05-26 13:01:01 +00003654 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003655}
3656
Christian Heimes44720832008-05-26 13:01:01 +00003657
3658PyDoc_STRVAR(isalpha__doc__,
3659"S.isalpha() -> bool\n\
3660\n\
3661Return True if all characters in S are alphabetic\n\
3662and there is at least one character in S, False otherwise.");
3663
3664static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003665string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003666{
Christian Heimes44720832008-05-26 13:01:01 +00003667 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003668 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003669 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003670
Christian Heimes44720832008-05-26 13:01:01 +00003671 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003672 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003673 isalpha(*p))
3674 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003675
3676 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003677 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003678 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003679
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003680 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003681 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003682 if (!isalpha(*p))
3683 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003684 }
Christian Heimes44720832008-05-26 13:01:01 +00003685 return PyBool_FromLong(1);
3686}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003687
Christian Heimes44720832008-05-26 13:01:01 +00003688
3689PyDoc_STRVAR(isalnum__doc__,
3690"S.isalnum() -> bool\n\
3691\n\
3692Return True if all characters in S are alphanumeric\n\
3693and there is at least one character in S, False otherwise.");
3694
3695static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003696string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003697{
3698 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003699 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003700 register const unsigned char *e;
3701
3702 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003703 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003704 isalnum(*p))
3705 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003706
3707 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003708 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003709 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003710
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003711 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003712 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003713 if (!isalnum(*p))
3714 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003715 }
3716 return PyBool_FromLong(1);
3717}
3718
3719
3720PyDoc_STRVAR(isdigit__doc__,
3721"S.isdigit() -> bool\n\
3722\n\
3723Return True if all characters in S are digits\n\
3724and there is at least one character in S, False otherwise.");
3725
3726static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003727string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003728{
3729 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003730 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003731 register const unsigned char *e;
3732
3733 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003734 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003735 isdigit(*p))
3736 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003737
3738 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003739 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003740 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003741
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003742 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003743 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003744 if (!isdigit(*p))
3745 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003746 }
3747 return PyBool_FromLong(1);
3748}
3749
3750
3751PyDoc_STRVAR(islower__doc__,
3752"S.islower() -> bool\n\
3753\n\
3754Return True if all cased characters in S are lowercase and there is\n\
3755at least one cased character in S, False otherwise.");
3756
3757static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003758string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003759{
3760 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003761 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003762 register const unsigned char *e;
3763 int cased;
3764
3765 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003766 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003767 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003768
3769 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003770 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003771 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003772
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003773 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003774 cased = 0;
3775 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003776 if (isupper(*p))
3777 return PyBool_FromLong(0);
3778 else if (!cased && islower(*p))
3779 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003780 }
3781 return PyBool_FromLong(cased);
3782}
3783
3784
3785PyDoc_STRVAR(isupper__doc__,
3786"S.isupper() -> bool\n\
3787\n\
3788Return True if all cased characters in S are uppercase and there is\n\
3789at least one cased character in S, False otherwise.");
3790
3791static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003792string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003793{
3794 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003795 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003796 register const unsigned char *e;
3797 int cased;
3798
3799 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003800 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003801 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003802
3803 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003804 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003805 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003806
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003807 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003808 cased = 0;
3809 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003810 if (islower(*p))
3811 return PyBool_FromLong(0);
3812 else if (!cased && isupper(*p))
3813 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003814 }
3815 return PyBool_FromLong(cased);
3816}
3817
3818
3819PyDoc_STRVAR(istitle__doc__,
3820"S.istitle() -> bool\n\
3821\n\
3822Return True if S is a titlecased string and there is at least one\n\
3823character in S, i.e. uppercase characters may only follow uncased\n\
3824characters and lowercase characters only cased ones. Return False\n\
3825otherwise.");
3826
3827static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003828string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003829{
3830 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003831 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003832 register const unsigned char *e;
3833 int cased, previous_is_cased;
3834
3835 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003836 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003837 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003838
3839 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003840 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003841 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003842
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003843 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003844 cased = 0;
3845 previous_is_cased = 0;
3846 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003847 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003848
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003849 if (isupper(ch)) {
3850 if (previous_is_cased)
3851 return PyBool_FromLong(0);
3852 previous_is_cased = 1;
3853 cased = 1;
3854 }
3855 else if (islower(ch)) {
3856 if (!previous_is_cased)
3857 return PyBool_FromLong(0);
3858 previous_is_cased = 1;
3859 cased = 1;
3860 }
3861 else
3862 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003863 }
3864 return PyBool_FromLong(cased);
3865}
3866
3867
3868PyDoc_STRVAR(splitlines__doc__,
3869"S.splitlines([keepends]) -> list of strings\n\
3870\n\
3871Return a list of the lines in S, breaking at line boundaries.\n\
3872Line breaks are not included in the resulting list unless keepends\n\
3873is given and true.");
3874
3875static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003876string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003877{
3878 register Py_ssize_t i;
3879 register Py_ssize_t j;
3880 Py_ssize_t len;
3881 int keepends = 0;
3882 PyObject *list;
3883 PyObject *str;
3884 char *data;
3885
3886 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003887 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003888
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003889 data = PyString_AS_STRING(self);
3890 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003891
3892 /* This does not use the preallocated list because splitlines is
3893 usually run with hundreds of newlines. The overhead of
3894 switching between PyList_SET_ITEM and append causes about a
3895 2-3% slowdown for that common case. A smarter implementation
3896 could move the if check out, so the SET_ITEMs are done first
3897 and the appends only done when the prealloc buffer is full.
3898 That's too much work for little gain.*/
3899
3900 list = PyList_New(0);
3901 if (!list)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003902 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +00003903
3904 for (i = j = 0; i < len; ) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003905 Py_ssize_t eol;
Christian Heimes44720832008-05-26 13:01:01 +00003906
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003907 /* Find a line and append it */
3908 while (i < len && data[i] != '\n' && data[i] != '\r')
3909 i++;
Christian Heimes44720832008-05-26 13:01:01 +00003910
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003911 /* Skip the line break reading CRLF as one line break */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003912 eol = i;
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003913 if (i < len) {
3914 if (data[i] == '\r' && i + 1 < len &&
3915 data[i+1] == '\n')
3916 i += 2;
3917 else
3918 i++;
3919 if (keepends)
3920 eol = i;
3921 }
3922 SPLIT_APPEND(data, j, eol);
3923 j = i;
Christian Heimes44720832008-05-26 13:01:01 +00003924 }
3925 if (j < len) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003926 SPLIT_APPEND(data, j, len);
Christian Heimes44720832008-05-26 13:01:01 +00003927 }
3928
3929 return list;
3930
3931 onError:
3932 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003933 return NULL;
3934}
3935
Robert Schuppenies51df0642008-06-01 16:16:17 +00003936PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003937"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003938
3939static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003940string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003941{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003942 Py_ssize_t res;
3943 res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
3944 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003945}
3946
Christian Heimes44720832008-05-26 13:01:01 +00003947#undef SPLIT_APPEND
3948#undef SPLIT_ADD
3949#undef MAX_PREALLOC
3950#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003951
3952static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003953string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003954{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003955 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003956}
3957
Christian Heimes1a6387e2008-03-26 12:49:49 +00003958
Christian Heimes44720832008-05-26 13:01:01 +00003959#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003960
Christian Heimes44720832008-05-26 13:01:01 +00003961PyDoc_STRVAR(format__doc__,
Georg Brandlc5356992010-08-01 22:02:09 +00003962"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003963\n\
3964");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003965
Eric Smithdc13b792008-05-30 18:10:04 +00003966static PyObject *
3967string__format__(PyObject* self, PyObject* args)
3968{
3969 PyObject *format_spec;
3970 PyObject *result = NULL;
3971 PyObject *tmp = NULL;
3972
3973 /* If 2.x, convert format_spec to the same type as value */
3974 /* This is to allow things like u''.format('') */
3975 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003976 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003977 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003978 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3979 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3980 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003981 }
3982 tmp = PyObject_Str(format_spec);
3983 if (tmp == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003984 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003985 format_spec = tmp;
3986
3987 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003988 PyString_AS_STRING(format_spec),
3989 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003990done:
3991 Py_XDECREF(tmp);
3992 return result;
3993}
3994
Christian Heimes44720832008-05-26 13:01:01 +00003995PyDoc_STRVAR(p_format__doc__,
Georg Brandlc5356992010-08-01 22:02:09 +00003996"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003997\n\
3998");
3999
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00004000
Christian Heimes1a6387e2008-03-26 12:49:49 +00004001static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00004002string_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004003 /* Counterparts of the obsolete stropmodule functions; except
4004 string.maketrans(). */
4005 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4006 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4007 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4008 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4009 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4010 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4011 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4012 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4013 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4014 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4015 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4016 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4017 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4018 capitalize__doc__},
4019 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4020 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4021 endswith__doc__},
4022 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4023 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4024 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4025 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4026 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4027 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4028 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4029 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4030 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4031 rpartition__doc__},
4032 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4033 startswith__doc__},
4034 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4035 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4036 swapcase__doc__},
4037 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4038 translate__doc__},
4039 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4040 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4041 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4042 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4043 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4044 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4045 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4046 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4047 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4048 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4049 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4050 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4051 expandtabs__doc__},
4052 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4053 splitlines__doc__},
4054 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4055 sizeof__doc__},
4056 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4057 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004058};
4059
4060static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004061str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004062
Christian Heimes44720832008-05-26 13:01:01 +00004063static PyObject *
4064string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4065{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004066 PyObject *x = NULL;
4067 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00004068
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004069 if (type != &PyString_Type)
4070 return str_subtype_new(type, args, kwds);
4071 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4072 return NULL;
4073 if (x == NULL)
4074 return PyString_FromString("");
4075 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00004076}
4077
4078static PyObject *
4079str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4080{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004081 PyObject *tmp, *pnew;
4082 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00004083
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004084 assert(PyType_IsSubtype(type, &PyString_Type));
4085 tmp = string_new(&PyString_Type, args, kwds);
4086 if (tmp == NULL)
4087 return NULL;
4088 assert(PyString_CheckExact(tmp));
4089 n = PyString_GET_SIZE(tmp);
4090 pnew = type->tp_alloc(type, n);
4091 if (pnew != NULL) {
4092 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4093 ((PyStringObject *)pnew)->ob_shash =
4094 ((PyStringObject *)tmp)->ob_shash;
4095 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4096 }
4097 Py_DECREF(tmp);
4098 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00004099}
4100
4101static PyObject *
4102basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4103{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004104 PyErr_SetString(PyExc_TypeError,
4105 "The basestring type cannot be instantiated");
4106 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004107}
4108
4109static PyObject *
4110string_mod(PyObject *v, PyObject *w)
4111{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004112 if (!PyString_Check(v)) {
4113 Py_INCREF(Py_NotImplemented);
4114 return Py_NotImplemented;
4115 }
4116 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004117}
4118
4119PyDoc_STRVAR(basestring_doc,
4120"Type basestring cannot be instantiated; it is the base for str and unicode.");
4121
4122static PyNumberMethods string_as_number = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004123 0, /*nb_add*/
4124 0, /*nb_subtract*/
4125 0, /*nb_multiply*/
4126 0, /*nb_divide*/
4127 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00004128};
4129
4130
4131PyTypeObject PyBaseString_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004132 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4133 "basestring",
4134 0,
4135 0,
4136 0, /* tp_dealloc */
4137 0, /* tp_print */
4138 0, /* tp_getattr */
4139 0, /* tp_setattr */
4140 0, /* tp_compare */
4141 0, /* tp_repr */
4142 0, /* tp_as_number */
4143 0, /* tp_as_sequence */
4144 0, /* tp_as_mapping */
4145 0, /* tp_hash */
4146 0, /* tp_call */
4147 0, /* tp_str */
4148 0, /* tp_getattro */
4149 0, /* tp_setattro */
4150 0, /* tp_as_buffer */
4151 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4152 basestring_doc, /* tp_doc */
4153 0, /* tp_traverse */
4154 0, /* tp_clear */
4155 0, /* tp_richcompare */
4156 0, /* tp_weaklistoffset */
4157 0, /* tp_iter */
4158 0, /* tp_iternext */
4159 0, /* tp_methods */
4160 0, /* tp_members */
4161 0, /* tp_getset */
4162 &PyBaseObject_Type, /* tp_base */
4163 0, /* tp_dict */
4164 0, /* tp_descr_get */
4165 0, /* tp_descr_set */
4166 0, /* tp_dictoffset */
4167 0, /* tp_init */
4168 0, /* tp_alloc */
4169 basestring_new, /* tp_new */
4170 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00004171};
4172
4173PyDoc_STRVAR(string_doc,
4174"str(object) -> string\n\
4175\n\
4176Return a nice string representation of the object.\n\
4177If the argument is a string, the return value is the same object.");
4178
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004179PyTypeObject PyString_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004180 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4181 "str",
4182 sizeof(PyStringObject),
4183 sizeof(char),
4184 string_dealloc, /* tp_dealloc */
4185 (printfunc)string_print, /* tp_print */
4186 0, /* tp_getattr */
4187 0, /* tp_setattr */
4188 0, /* tp_compare */
4189 string_repr, /* tp_repr */
4190 &string_as_number, /* tp_as_number */
4191 &string_as_sequence, /* tp_as_sequence */
4192 &string_as_mapping, /* tp_as_mapping */
4193 (hashfunc)string_hash, /* tp_hash */
4194 0, /* tp_call */
4195 string_str, /* tp_str */
4196 PyObject_GenericGetAttr, /* tp_getattro */
4197 0, /* tp_setattro */
4198 &string_as_buffer, /* tp_as_buffer */
4199 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4200 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4201 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4202 string_doc, /* tp_doc */
4203 0, /* tp_traverse */
4204 0, /* tp_clear */
4205 (richcmpfunc)string_richcompare, /* tp_richcompare */
4206 0, /* tp_weaklistoffset */
4207 0, /* tp_iter */
4208 0, /* tp_iternext */
4209 string_methods, /* tp_methods */
4210 0, /* tp_members */
4211 0, /* tp_getset */
4212 &PyBaseString_Type, /* tp_base */
4213 0, /* tp_dict */
4214 0, /* tp_descr_get */
4215 0, /* tp_descr_set */
4216 0, /* tp_dictoffset */
4217 0, /* tp_init */
4218 0, /* tp_alloc */
4219 string_new, /* tp_new */
4220 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00004221};
4222
4223void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004224PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004225{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004226 register PyObject *v;
4227 if (*pv == NULL)
4228 return;
4229 if (w == NULL || !PyString_Check(*pv)) {
4230 Py_DECREF(*pv);
4231 *pv = NULL;
4232 return;
4233 }
4234 v = string_concat((PyStringObject *) *pv, w);
4235 Py_DECREF(*pv);
4236 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00004237}
4238
4239void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004240PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004241{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004242 PyString_Concat(pv, w);
4243 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00004244}
4245
4246
4247/* The following function breaks the notion that strings are immutable:
4248 it changes the size of a string. We get away with this only if there
4249 is only one module referencing the object. You can also think of it
4250 as creating a new string object and destroying the old one, only
4251 more efficiently. In any case, don't use this if the string may
4252 already be known to some other part of the code...
4253 Note that if there's not enough memory to resize the string, the original
4254 string object at *pv is deallocated, *pv is set to NULL, an "out of
4255 memory" exception is set, and -1 is returned. Else (on success) 0 is
4256 returned, and the value in *pv may or may not be the same as on input.
4257 As always, an extra byte is allocated for a trailing \0 byte (newsize
4258 does *not* include that), and a trailing \0 byte is stored.
4259*/
4260
4261int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004262_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004263{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004264 register PyObject *v;
4265 register PyStringObject *sv;
4266 v = *pv;
4267 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4268 PyString_CHECK_INTERNED(v)) {
4269 *pv = 0;
4270 Py_DECREF(v);
4271 PyErr_BadInternalCall();
4272 return -1;
4273 }
4274 /* XXX UNREF/NEWREF interface should be more symmetrical */
4275 _Py_DEC_REFTOTAL;
4276 _Py_ForgetReference(v);
4277 *pv = (PyObject *)
4278 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4279 if (*pv == NULL) {
4280 PyObject_Del(v);
4281 PyErr_NoMemory();
4282 return -1;
4283 }
4284 _Py_NewReference(*pv);
4285 sv = (PyStringObject *) *pv;
4286 Py_SIZE(sv) = newsize;
4287 sv->ob_sval[newsize] = '\0';
4288 sv->ob_shash = -1; /* invalidate cached hash value */
4289 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00004290}
4291
4292/* Helpers for formatstring */
4293
4294Py_LOCAL_INLINE(PyObject *)
4295getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4296{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004297 Py_ssize_t argidx = *p_argidx;
4298 if (argidx < arglen) {
4299 (*p_argidx)++;
4300 if (arglen < 0)
4301 return args;
4302 else
4303 return PyTuple_GetItem(args, argidx);
4304 }
4305 PyErr_SetString(PyExc_TypeError,
4306 "not enough arguments for format string");
4307 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004308}
4309
4310/* Format codes
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004311 * F_LJUST '-'
4312 * F_SIGN '+'
4313 * F_BLANK ' '
4314 * F_ALT '#'
4315 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00004316 */
4317#define F_LJUST (1<<0)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004318#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00004319#define F_BLANK (1<<2)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004320#define F_ALT (1<<3)
4321#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00004322
4323Py_LOCAL_INLINE(int)
4324formatfloat(char *buf, size_t buflen, int flags,
4325 int prec, int type, PyObject *v)
4326{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004327 /* fmt = '%#.' + `prec` + `type`
4328 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4329 char fmt[20];
4330 double x;
4331 x = PyFloat_AsDouble(v);
4332 if (x == -1.0 && PyErr_Occurred()) {
4333 PyErr_Format(PyExc_TypeError, "float argument required, "
4334 "not %.200s", Py_TYPE(v)->tp_name);
4335 return -1;
4336 }
4337 if (prec < 0)
4338 prec = 6;
Mark Dickinson75be68b2009-08-28 20:57:42 +00004339#if SIZEOF_INT > 4
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004340 /* make sure that the decimal representation of precision really does
4341 need at most 10 digits: platforms with sizeof(int) == 8 exist! */
4342 if (prec > 0x7fffffff) {
4343 PyErr_SetString(PyExc_OverflowError,
4344 "outrageously large precision "
4345 "for formatted float");
4346 return -1;
4347 }
Mark Dickinson75be68b2009-08-28 20:57:42 +00004348#endif
Mark Dickinson87886192009-03-29 16:18:33 +00004349
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004350 if (type == 'f' && fabs(x) >= 1e50)
4351 type = 'g';
4352 /* Worst case length calc to ensure no buffer overrun:
Christian Heimes44720832008-05-26 13:01:01 +00004353
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004354 'g' formats:
4355 fmt = %#.<prec>g
4356 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4357 for any double rep.)
4358 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Christian Heimes44720832008-05-26 13:01:01 +00004359
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004360 'f' formats:
4361 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4362 len = 1 + 50 + 1 + prec = 52 + prec
Christian Heimes44720832008-05-26 13:01:01 +00004363
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004364 If prec=0 the effective precision is 1 (the leading digit is
4365 always given), therefore increase the length by one.
Christian Heimes44720832008-05-26 13:01:01 +00004366
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004367 */
4368 if (((type == 'g' || type == 'G') &&
4369 buflen <= (size_t)10 + (size_t)prec) ||
4370 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4371 PyErr_SetString(PyExc_OverflowError,
4372 "formatted float is too long (precision too large?)");
4373 return -1;
4374 }
4375 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4376 (flags&F_ALT) ? "#" : "",
4377 prec, type);
4378 PyOS_ascii_formatd(buf, buflen, fmt, x);
4379 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004380}
4381
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004382/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004383 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4384 * Python's regular ints.
4385 * Return value: a new PyString*, or NULL if error.
4386 * . *pbuf is set to point into it,
4387 * *plen set to the # of chars following that.
4388 * Caller must decref it when done using pbuf.
4389 * The string starting at *pbuf is of the form
4390 * "-"? ("0x" | "0X")? digit+
4391 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4392 * set in flags. The case of hex digits will be correct,
4393 * There will be at least prec digits, zero-filled on the left if
4394 * necessary to get that many.
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004395 * val object to be converted
4396 * flags bitmask of format flags; only F_ALT is looked at
4397 * prec minimum number of digits; 0-fill on left if needed
4398 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00004399 *
4400 * CAUTION: o, x and X conversions on regular ints can never
4401 * produce a '-' sign, but can for Python's unbounded ints.
4402 */
4403PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004404_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004405 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00004406{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004407 PyObject *result = NULL;
4408 char *buf;
4409 Py_ssize_t i;
4410 int sign; /* 1 if '-', else 0 */
4411 int len; /* number of characters */
4412 Py_ssize_t llen;
4413 int numdigits; /* len == numnondigits + numdigits */
4414 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004415
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004416 switch (type) {
4417 case 'd':
4418 case 'u':
4419 result = Py_TYPE(val)->tp_str(val);
4420 break;
4421 case 'o':
4422 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4423 break;
4424 case 'x':
4425 case 'X':
4426 numnondigits = 2;
4427 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4428 break;
4429 default:
4430 assert(!"'type' not in [duoxX]");
4431 }
4432 if (!result)
4433 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004434
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004435 buf = PyString_AsString(result);
4436 if (!buf) {
4437 Py_DECREF(result);
4438 return NULL;
4439 }
Christian Heimes44720832008-05-26 13:01:01 +00004440
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004441 /* To modify the string in-place, there can only be one reference. */
4442 if (Py_REFCNT(result) != 1) {
4443 PyErr_BadInternalCall();
4444 return NULL;
4445 }
4446 llen = PyString_Size(result);
4447 if (llen > INT_MAX) {
4448 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4449 return NULL;
4450 }
4451 len = (int)llen;
4452 if (buf[len-1] == 'L') {
4453 --len;
4454 buf[len] = '\0';
4455 }
4456 sign = buf[0] == '-';
4457 numnondigits += sign;
4458 numdigits = len - numnondigits;
4459 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004460
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004461 /* Get rid of base marker unless F_ALT */
4462 if ((flags & F_ALT) == 0) {
4463 /* Need to skip 0x, 0X or 0. */
4464 int skipped = 0;
4465 switch (type) {
4466 case 'o':
4467 assert(buf[sign] == '0');
4468 /* If 0 is only digit, leave it alone. */
4469 if (numdigits > 1) {
4470 skipped = 1;
4471 --numdigits;
4472 }
4473 break;
4474 case 'x':
4475 case 'X':
4476 assert(buf[sign] == '0');
4477 assert(buf[sign + 1] == 'x');
4478 skipped = 2;
4479 numnondigits -= 2;
4480 break;
4481 }
4482 if (skipped) {
4483 buf += skipped;
4484 len -= skipped;
4485 if (sign)
4486 buf[0] = '-';
4487 }
4488 assert(len == numnondigits + numdigits);
4489 assert(numdigits > 0);
4490 }
Christian Heimes44720832008-05-26 13:01:01 +00004491
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004492 /* Fill with leading zeroes to meet minimum width. */
4493 if (prec > numdigits) {
4494 PyObject *r1 = PyString_FromStringAndSize(NULL,
4495 numnondigits + prec);
4496 char *b1;
4497 if (!r1) {
4498 Py_DECREF(result);
4499 return NULL;
4500 }
4501 b1 = PyString_AS_STRING(r1);
4502 for (i = 0; i < numnondigits; ++i)
4503 *b1++ = *buf++;
4504 for (i = 0; i < prec - numdigits; i++)
4505 *b1++ = '0';
4506 for (i = 0; i < numdigits; i++)
4507 *b1++ = *buf++;
4508 *b1 = '\0';
4509 Py_DECREF(result);
4510 result = r1;
4511 buf = PyString_AS_STRING(result);
4512 len = numnondigits + prec;
4513 }
Christian Heimes44720832008-05-26 13:01:01 +00004514
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004515 /* Fix up case for hex conversions. */
4516 if (type == 'X') {
4517 /* Need to convert all lower case letters to upper case.
4518 and need to convert 0x to 0X (and -0x to -0X). */
4519 for (i = 0; i < len; i++)
4520 if (buf[i] >= 'a' && buf[i] <= 'x')
4521 buf[i] -= 'a'-'A';
4522 }
4523 *pbuf = buf;
4524 *plen = len;
4525 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004526}
4527
4528Py_LOCAL_INLINE(int)
4529formatint(char *buf, size_t buflen, int flags,
4530 int prec, int type, PyObject *v)
4531{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004532 /* fmt = '%#.' + `prec` + 'l' + `type`
4533 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4534 + 1 + 1 = 24 */
4535 char fmt[64]; /* plenty big enough! */
4536 char *sign;
4537 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004538
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004539 x = PyInt_AsLong(v);
4540 if (x == -1 && PyErr_Occurred()) {
4541 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4542 Py_TYPE(v)->tp_name);
4543 return -1;
4544 }
4545 if (x < 0 && type == 'u') {
4546 type = 'd';
4547 }
4548 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4549 sign = "-";
4550 else
4551 sign = "";
4552 if (prec < 0)
4553 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004554
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004555 if ((flags & F_ALT) &&
4556 (type == 'x' || type == 'X')) {
4557 /* When converting under %#x or %#X, there are a number
4558 * of issues that cause pain:
4559 * - when 0 is being converted, the C standard leaves off
4560 * the '0x' or '0X', which is inconsistent with other
4561 * %#x/%#X conversions and inconsistent with Python's
4562 * hex() function
4563 * - there are platforms that violate the standard and
4564 * convert 0 with the '0x' or '0X'
4565 * (Metrowerks, Compaq Tru64)
4566 * - there are platforms that give '0x' when converting
4567 * under %#X, but convert 0 in accordance with the
4568 * standard (OS/2 EMX)
4569 *
4570 * We can achieve the desired consistency by inserting our
4571 * own '0x' or '0X' prefix, and substituting %x/%X in place
4572 * of %#x/%#X.
4573 *
4574 * Note that this is the same approach as used in
4575 * formatint() in unicodeobject.c
4576 */
4577 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4578 sign, type, prec, type);
4579 }
4580 else {
4581 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4582 sign, (flags&F_ALT) ? "#" : "",
4583 prec, type);
4584 }
Christian Heimes44720832008-05-26 13:01:01 +00004585
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004586 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4587 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4588 */
4589 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4590 PyErr_SetString(PyExc_OverflowError,
4591 "formatted integer is too long (precision too large?)");
4592 return -1;
4593 }
4594 if (sign[0])
4595 PyOS_snprintf(buf, buflen, fmt, -x);
4596 else
4597 PyOS_snprintf(buf, buflen, fmt, x);
4598 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004599}
4600
4601Py_LOCAL_INLINE(int)
4602formatchar(char *buf, size_t buflen, PyObject *v)
4603{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004604 /* presume that the buffer is at least 2 characters long */
4605 if (PyString_Check(v)) {
4606 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4607 return -1;
4608 }
4609 else {
4610 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4611 return -1;
4612 }
4613 buf[1] = '\0';
4614 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004615}
4616
4617/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4618
4619 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4620 chars are formatted. XXX This is a magic number. Each formatting
4621 routine does bounds checking to ensure no overflow, but a better
4622 solution may be to malloc a buffer of appropriate size for each
4623 format. For now, the current solution is sufficient.
4624*/
4625#define FORMATBUFLEN (size_t)120
4626
4627PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004628PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004629{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004630 char *fmt, *res;
4631 Py_ssize_t arglen, argidx;
4632 Py_ssize_t reslen, rescnt, fmtcnt;
4633 int args_owned = 0;
4634 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004635#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004636 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004637#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004638 PyObject *dict = NULL;
4639 if (format == NULL || !PyString_Check(format) || args == NULL) {
4640 PyErr_BadInternalCall();
4641 return NULL;
4642 }
4643 orig_args = args;
4644 fmt = PyString_AS_STRING(format);
4645 fmtcnt = PyString_GET_SIZE(format);
4646 reslen = rescnt = fmtcnt + 100;
4647 result = PyString_FromStringAndSize((char *)NULL, reslen);
4648 if (result == NULL)
4649 return NULL;
4650 res = PyString_AsString(result);
4651 if (PyTuple_Check(args)) {
4652 arglen = PyTuple_GET_SIZE(args);
4653 argidx = 0;
4654 }
4655 else {
4656 arglen = -1;
4657 argidx = -2;
4658 }
4659 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4660 !PyObject_TypeCheck(args, &PyBaseString_Type))
4661 dict = args;
4662 while (--fmtcnt >= 0) {
4663 if (*fmt != '%') {
4664 if (--rescnt < 0) {
4665 rescnt = fmtcnt + 100;
4666 reslen += rescnt;
4667 if (_PyString_Resize(&result, reslen) < 0)
4668 return NULL;
4669 res = PyString_AS_STRING(result)
4670 + reslen - rescnt;
4671 --rescnt;
4672 }
4673 *res++ = *fmt++;
4674 }
4675 else {
4676 /* Got a format specifier */
4677 int flags = 0;
4678 Py_ssize_t width = -1;
4679 int prec = -1;
4680 int c = '\0';
4681 int fill;
4682 int isnumok;
4683 PyObject *v = NULL;
4684 PyObject *temp = NULL;
4685 char *pbuf;
4686 int sign;
4687 Py_ssize_t len;
4688 char formatbuf[FORMATBUFLEN];
4689 /* For format{float,int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004690#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004691 char *fmt_start = fmt;
4692 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004693#endif
4694
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004695 fmt++;
4696 if (*fmt == '(') {
4697 char *keystart;
4698 Py_ssize_t keylen;
4699 PyObject *key;
4700 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004701
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004702 if (dict == NULL) {
4703 PyErr_SetString(PyExc_TypeError,
4704 "format requires a mapping");
4705 goto error;
4706 }
4707 ++fmt;
4708 --fmtcnt;
4709 keystart = fmt;
4710 /* Skip over balanced parentheses */
4711 while (pcount > 0 && --fmtcnt >= 0) {
4712 if (*fmt == ')')
4713 --pcount;
4714 else if (*fmt == '(')
4715 ++pcount;
4716 fmt++;
4717 }
4718 keylen = fmt - keystart - 1;
4719 if (fmtcnt < 0 || pcount > 0) {
4720 PyErr_SetString(PyExc_ValueError,
4721 "incomplete format key");
4722 goto error;
4723 }
4724 key = PyString_FromStringAndSize(keystart,
4725 keylen);
4726 if (key == NULL)
4727 goto error;
4728 if (args_owned) {
4729 Py_DECREF(args);
4730 args_owned = 0;
4731 }
4732 args = PyObject_GetItem(dict, key);
4733 Py_DECREF(key);
4734 if (args == NULL) {
4735 goto error;
4736 }
4737 args_owned = 1;
4738 arglen = -1;
4739 argidx = -2;
4740 }
4741 while (--fmtcnt >= 0) {
4742 switch (c = *fmt++) {
4743 case '-': flags |= F_LJUST; continue;
4744 case '+': flags |= F_SIGN; continue;
4745 case ' ': flags |= F_BLANK; continue;
4746 case '#': flags |= F_ALT; continue;
4747 case '0': flags |= F_ZERO; continue;
4748 }
4749 break;
4750 }
4751 if (c == '*') {
4752 v = getnextarg(args, arglen, &argidx);
4753 if (v == NULL)
4754 goto error;
4755 if (!PyInt_Check(v)) {
4756 PyErr_SetString(PyExc_TypeError,
4757 "* wants int");
4758 goto error;
4759 }
4760 width = PyInt_AsLong(v);
4761 if (width < 0) {
4762 flags |= F_LJUST;
4763 width = -width;
4764 }
4765 if (--fmtcnt >= 0)
4766 c = *fmt++;
4767 }
4768 else if (c >= 0 && isdigit(c)) {
4769 width = c - '0';
4770 while (--fmtcnt >= 0) {
4771 c = Py_CHARMASK(*fmt++);
4772 if (!isdigit(c))
4773 break;
4774 if ((width*10) / 10 != width) {
4775 PyErr_SetString(
4776 PyExc_ValueError,
4777 "width too big");
4778 goto error;
4779 }
4780 width = width*10 + (c - '0');
4781 }
4782 }
4783 if (c == '.') {
4784 prec = 0;
4785 if (--fmtcnt >= 0)
4786 c = *fmt++;
4787 if (c == '*') {
4788 v = getnextarg(args, arglen, &argidx);
4789 if (v == NULL)
4790 goto error;
4791 if (!PyInt_Check(v)) {
4792 PyErr_SetString(
4793 PyExc_TypeError,
4794 "* wants int");
4795 goto error;
4796 }
4797 prec = PyInt_AsLong(v);
4798 if (prec < 0)
4799 prec = 0;
4800 if (--fmtcnt >= 0)
4801 c = *fmt++;
4802 }
4803 else if (c >= 0 && isdigit(c)) {
4804 prec = c - '0';
4805 while (--fmtcnt >= 0) {
4806 c = Py_CHARMASK(*fmt++);
4807 if (!isdigit(c))
4808 break;
4809 if ((prec*10) / 10 != prec) {
4810 PyErr_SetString(
4811 PyExc_ValueError,
4812 "prec too big");
4813 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004814 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004815 prec = prec*10 + (c - '0');
4816 }
4817 }
4818 } /* prec */
4819 if (fmtcnt >= 0) {
4820 if (c == 'h' || c == 'l' || c == 'L') {
4821 if (--fmtcnt >= 0)
4822 c = *fmt++;
4823 }
4824 }
4825 if (fmtcnt < 0) {
4826 PyErr_SetString(PyExc_ValueError,
4827 "incomplete format");
4828 goto error;
4829 }
4830 if (c != '%') {
4831 v = getnextarg(args, arglen, &argidx);
4832 if (v == NULL)
4833 goto error;
4834 }
4835 sign = 0;
4836 fill = ' ';
4837 switch (c) {
4838 case '%':
4839 pbuf = "%";
4840 len = 1;
4841 break;
4842 case 's':
4843#ifdef Py_USING_UNICODE
4844 if (PyUnicode_Check(v)) {
4845 fmt = fmt_start;
4846 argidx = argidx_start;
4847 goto unicode;
4848 }
4849#endif
4850 temp = _PyObject_Str(v);
4851#ifdef Py_USING_UNICODE
4852 if (temp != NULL && PyUnicode_Check(temp)) {
4853 Py_DECREF(temp);
4854 fmt = fmt_start;
4855 argidx = argidx_start;
4856 goto unicode;
4857 }
4858#endif
4859 /* Fall through */
4860 case 'r':
4861 if (c == 'r')
4862 temp = PyObject_Repr(v);
4863 if (temp == NULL)
4864 goto error;
4865 if (!PyString_Check(temp)) {
4866 PyErr_SetString(PyExc_TypeError,
4867 "%s argument has non-string str()");
4868 Py_DECREF(temp);
4869 goto error;
4870 }
4871 pbuf = PyString_AS_STRING(temp);
4872 len = PyString_GET_SIZE(temp);
4873 if (prec >= 0 && len > prec)
4874 len = prec;
4875 break;
4876 case 'i':
4877 case 'd':
4878 case 'u':
4879 case 'o':
4880 case 'x':
4881 case 'X':
4882 if (c == 'i')
4883 c = 'd';
4884 isnumok = 0;
4885 if (PyNumber_Check(v)) {
4886 PyObject *iobj=NULL;
4887
4888 if (PyInt_Check(v) || (PyLong_Check(v))) {
4889 iobj = v;
4890 Py_INCREF(iobj);
4891 }
4892 else {
4893 iobj = PyNumber_Int(v);
4894 if (iobj==NULL) iobj = PyNumber_Long(v);
4895 }
4896 if (iobj!=NULL) {
4897 if (PyInt_Check(iobj)) {
4898 isnumok = 1;
4899 pbuf = formatbuf;
4900 len = formatint(pbuf,
4901 sizeof(formatbuf),
4902 flags, prec, c, iobj);
4903 Py_DECREF(iobj);
4904 if (len < 0)
4905 goto error;
4906 sign = 1;
4907 }
4908 else if (PyLong_Check(iobj)) {
4909 int ilen;
4910
4911 isnumok = 1;
4912 temp = _PyString_FormatLong(iobj, flags,
4913 prec, c, &pbuf, &ilen);
4914 Py_DECREF(iobj);
4915 len = ilen;
4916 if (!temp)
4917 goto error;
4918 sign = 1;
4919 }
4920 else {
4921 Py_DECREF(iobj);
4922 }
4923 }
4924 }
4925 if (!isnumok) {
4926 PyErr_Format(PyExc_TypeError,
4927 "%%%c format: a number is required, "
4928 "not %.200s", c, Py_TYPE(v)->tp_name);
4929 goto error;
4930 }
4931 if (flags & F_ZERO)
4932 fill = '0';
4933 break;
4934 case 'e':
4935 case 'E':
4936 case 'f':
4937 case 'F':
4938 case 'g':
4939 case 'G':
4940 if (c == 'F')
4941 c = 'f';
4942 pbuf = formatbuf;
4943 len = formatfloat(pbuf, sizeof(formatbuf),
4944 flags, prec, c, v);
4945 if (len < 0)
4946 goto error;
4947 sign = 1;
4948 if (flags & F_ZERO)
4949 fill = '0';
4950 break;
4951 case 'c':
4952#ifdef Py_USING_UNICODE
4953 if (PyUnicode_Check(v)) {
4954 fmt = fmt_start;
4955 argidx = argidx_start;
4956 goto unicode;
4957 }
4958#endif
4959 pbuf = formatbuf;
4960 len = formatchar(pbuf, sizeof(formatbuf), v);
4961 if (len < 0)
4962 goto error;
4963 break;
4964 default:
4965 PyErr_Format(PyExc_ValueError,
4966 "unsupported format character '%c' (0x%x) "
4967 "at index %zd",
4968 c, c,
4969 (Py_ssize_t)(fmt - 1 -
4970 PyString_AsString(format)));
4971 goto error;
4972 }
4973 if (sign) {
4974 if (*pbuf == '-' || *pbuf == '+') {
4975 sign = *pbuf++;
4976 len--;
4977 }
4978 else if (flags & F_SIGN)
4979 sign = '+';
4980 else if (flags & F_BLANK)
4981 sign = ' ';
4982 else
4983 sign = 0;
4984 }
4985 if (width < len)
4986 width = len;
4987 if (rescnt - (sign != 0) < width) {
4988 reslen -= rescnt;
4989 rescnt = width + fmtcnt + 100;
4990 reslen += rescnt;
4991 if (reslen < 0) {
4992 Py_DECREF(result);
4993 Py_XDECREF(temp);
4994 return PyErr_NoMemory();
4995 }
4996 if (_PyString_Resize(&result, reslen) < 0) {
4997 Py_XDECREF(temp);
4998 return NULL;
4999 }
5000 res = PyString_AS_STRING(result)
5001 + reslen - rescnt;
5002 }
5003 if (sign) {
5004 if (fill != ' ')
5005 *res++ = sign;
5006 rescnt--;
5007 if (width > len)
5008 width--;
5009 }
5010 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5011 assert(pbuf[0] == '0');
5012 assert(pbuf[1] == c);
5013 if (fill != ' ') {
5014 *res++ = *pbuf++;
5015 *res++ = *pbuf++;
5016 }
5017 rescnt -= 2;
5018 width -= 2;
5019 if (width < 0)
5020 width = 0;
5021 len -= 2;
5022 }
5023 if (width > len && !(flags & F_LJUST)) {
5024 do {
5025 --rescnt;
5026 *res++ = fill;
5027 } while (--width > len);
5028 }
5029 if (fill == ' ') {
5030 if (sign)
5031 *res++ = sign;
5032 if ((flags & F_ALT) &&
5033 (c == 'x' || c == 'X')) {
5034 assert(pbuf[0] == '0');
5035 assert(pbuf[1] == c);
5036 *res++ = *pbuf++;
5037 *res++ = *pbuf++;
5038 }
5039 }
5040 Py_MEMCPY(res, pbuf, len);
5041 res += len;
5042 rescnt -= len;
5043 while (--width >= len) {
5044 --rescnt;
5045 *res++ = ' ';
5046 }
5047 if (dict && (argidx < arglen) && c != '%') {
5048 PyErr_SetString(PyExc_TypeError,
5049 "not all arguments converted during string formatting");
5050 Py_XDECREF(temp);
5051 goto error;
5052 }
5053 Py_XDECREF(temp);
5054 } /* '%' */
5055 } /* until end */
5056 if (argidx < arglen && !dict) {
5057 PyErr_SetString(PyExc_TypeError,
5058 "not all arguments converted during string formatting");
5059 goto error;
5060 }
5061 if (args_owned) {
5062 Py_DECREF(args);
5063 }
5064 _PyString_Resize(&result, reslen - rescnt);
5065 return result;
Christian Heimes44720832008-05-26 13:01:01 +00005066
5067#ifdef Py_USING_UNICODE
5068 unicode:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005069 if (args_owned) {
5070 Py_DECREF(args);
5071 args_owned = 0;
5072 }
5073 /* Fiddle args right (remove the first argidx arguments) */
5074 if (PyTuple_Check(orig_args) && argidx > 0) {
5075 PyObject *v;
5076 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5077 v = PyTuple_New(n);
5078 if (v == NULL)
5079 goto error;
5080 while (--n >= 0) {
5081 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5082 Py_INCREF(w);
5083 PyTuple_SET_ITEM(v, n, w);
5084 }
5085 args = v;
5086 } else {
5087 Py_INCREF(orig_args);
5088 args = orig_args;
5089 }
5090 args_owned = 1;
5091 /* Take what we have of the result and let the Unicode formatting
5092 function format the rest of the input. */
5093 rescnt = res - PyString_AS_STRING(result);
5094 if (_PyString_Resize(&result, rescnt))
5095 goto error;
5096 fmtcnt = PyString_GET_SIZE(format) - \
5097 (fmt - PyString_AS_STRING(format));
5098 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5099 if (format == NULL)
5100 goto error;
5101 v = PyUnicode_Format(format, args);
5102 Py_DECREF(format);
5103 if (v == NULL)
5104 goto error;
5105 /* Paste what we have (result) to what the Unicode formatting
5106 function returned (v) and return the result (or error) */
5107 w = PyUnicode_Concat(result, v);
5108 Py_DECREF(result);
5109 Py_DECREF(v);
5110 Py_DECREF(args);
5111 return w;
Christian Heimes44720832008-05-26 13:01:01 +00005112#endif /* Py_USING_UNICODE */
5113
5114 error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005115 Py_DECREF(result);
5116 if (args_owned) {
5117 Py_DECREF(args);
5118 }
5119 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00005120}
5121
5122void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005123PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005124{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005125 register PyStringObject *s = (PyStringObject *)(*p);
5126 PyObject *t;
5127 if (s == NULL || !PyString_Check(s))
5128 Py_FatalError("PyString_InternInPlace: strings only please!");
5129 /* If it's a string subclass, we don't really know what putting
5130 it in the interned dict might do. */
5131 if (!PyString_CheckExact(s))
5132 return;
5133 if (PyString_CHECK_INTERNED(s))
5134 return;
5135 if (interned == NULL) {
5136 interned = PyDict_New();
5137 if (interned == NULL) {
5138 PyErr_Clear(); /* Don't leave an exception */
5139 return;
5140 }
5141 }
5142 t = PyDict_GetItem(interned, (PyObject *)s);
5143 if (t) {
5144 Py_INCREF(t);
5145 Py_DECREF(*p);
5146 *p = t;
5147 return;
5148 }
Christian Heimes44720832008-05-26 13:01:01 +00005149
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005150 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5151 PyErr_Clear();
5152 return;
5153 }
5154 /* The two references in interned are not counted by refcnt.
5155 The string deallocator will take care of this */
5156 Py_REFCNT(s) -= 2;
5157 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005158}
5159
5160void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005161PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005162{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005163 PyString_InternInPlace(p);
5164 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5165 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5166 Py_INCREF(*p);
5167 }
Christian Heimes44720832008-05-26 13:01:01 +00005168}
5169
5170
5171PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005172PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005173{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005174 PyObject *s = PyString_FromString(cp);
5175 if (s == NULL)
5176 return NULL;
5177 PyString_InternInPlace(&s);
5178 return s;
Christian Heimes44720832008-05-26 13:01:01 +00005179}
5180
5181void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005182PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005183{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005184 int i;
5185 for (i = 0; i < UCHAR_MAX + 1; i++) {
5186 Py_XDECREF(characters[i]);
5187 characters[i] = NULL;
5188 }
5189 Py_XDECREF(nullstring);
5190 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00005191}
5192
5193void _Py_ReleaseInternedStrings(void)
5194{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005195 PyObject *keys;
5196 PyStringObject *s;
5197 Py_ssize_t i, n;
5198 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00005199
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005200 if (interned == NULL || !PyDict_Check(interned))
5201 return;
5202 keys = PyDict_Keys(interned);
5203 if (keys == NULL || !PyList_Check(keys)) {
5204 PyErr_Clear();
5205 return;
5206 }
Christian Heimes44720832008-05-26 13:01:01 +00005207
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005208 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5209 detector, interned strings are not forcibly deallocated; rather, we
5210 give them their stolen references back, and then clear and DECREF
5211 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00005212
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005213 n = PyList_GET_SIZE(keys);
5214 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5215 n);
5216 for (i = 0; i < n; i++) {
5217 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5218 switch (s->ob_sstate) {
5219 case SSTATE_NOT_INTERNED:
5220 /* XXX Shouldn't happen */
5221 break;
5222 case SSTATE_INTERNED_IMMORTAL:
5223 Py_REFCNT(s) += 1;
5224 immortal_size += Py_SIZE(s);
5225 break;
5226 case SSTATE_INTERNED_MORTAL:
5227 Py_REFCNT(s) += 2;
5228 mortal_size += Py_SIZE(s);
5229 break;
5230 default:
5231 Py_FatalError("Inconsistent interned string state.");
5232 }
5233 s->ob_sstate = SSTATE_NOT_INTERNED;
5234 }
5235 fprintf(stderr, "total size of all interned strings: "
5236 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5237 "mortal/immortal\n", mortal_size, immortal_size);
5238 Py_DECREF(keys);
5239 PyDict_Clear(interned);
5240 Py_DECREF(interned);
5241 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005242}