blob: 9e2673d4fa20f9937ff16bd2b91ac87645deddd5 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
7
8#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000012static PyStringObject *characters[UCHAR_MAX + 1];
13static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000014
15/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000027 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
29
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000031 string containing exactly `size' bytes.
32
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000034 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000036 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000037 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000039 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
43
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000048 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000050*/
51PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000053{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000054 register PyStringObject *op;
55 if (size < 0) {
56 PyErr_SetString(PyExc_SystemError,
57 "Negative size passed to PyString_FromStringAndSize");
58 return NULL;
59 }
60 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000061#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000062 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000063#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000064 Py_INCREF(op);
65 return (PyObject *)op;
66 }
67 if (size == 1 && str != NULL &&
68 (op = characters[*str & UCHAR_MAX]) != NULL)
69 {
Christian Heimes44720832008-05-26 13:01:01 +000070#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000071 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000072#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000073 Py_INCREF(op);
74 return (PyObject *)op;
75 }
Christian Heimes44720832008-05-26 13:01:01 +000076
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000077 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
78 PyErr_SetString(PyExc_OverflowError, "string is too large");
79 return NULL;
80 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000081
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000082 /* Inline PyObject_NewVar */
83 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
84 if (op == NULL)
85 return PyErr_NoMemory();
86 PyObject_INIT_VAR(op, &PyString_Type, size);
87 op->ob_shash = -1;
88 op->ob_sstate = SSTATE_NOT_INTERNED;
89 if (str != NULL)
90 Py_MEMCPY(op->ob_sval, str, size);
91 op->ob_sval[size] = '\0';
92 /* share short strings */
93 if (size == 0) {
94 PyObject *t = (PyObject *)op;
95 PyString_InternInPlace(&t);
96 op = (PyStringObject *)t;
97 nullstring = op;
98 Py_INCREF(op);
99 } else if (size == 1 && str != NULL) {
100 PyObject *t = (PyObject *)op;
101 PyString_InternInPlace(&t);
102 op = (PyStringObject *)t;
103 characters[*str & UCHAR_MAX] = op;
104 Py_INCREF(op);
105 }
106 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000107}
108
Christian Heimes44720832008-05-26 13:01:01 +0000109PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000110PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000111{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000112 register size_t size;
113 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000114
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000115 assert(str != NULL);
116 size = strlen(str);
117 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
118 PyErr_SetString(PyExc_OverflowError,
119 "string is too long for a Python string");
120 return NULL;
121 }
122 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000123#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000124 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000125#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
128 }
129 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000130#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000131 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000132#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000133 Py_INCREF(op);
134 return (PyObject *)op;
135 }
Christian Heimes44720832008-05-26 13:01:01 +0000136
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000137 /* Inline PyObject_NewVar */
138 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
139 if (op == NULL)
140 return PyErr_NoMemory();
141 PyObject_INIT_VAR(op, &PyString_Type, size);
142 op->ob_shash = -1;
143 op->ob_sstate = SSTATE_NOT_INTERNED;
144 Py_MEMCPY(op->ob_sval, str, size+1);
145 /* share short strings */
146 if (size == 0) {
147 PyObject *t = (PyObject *)op;
148 PyString_InternInPlace(&t);
149 op = (PyStringObject *)t;
150 nullstring = op;
151 Py_INCREF(op);
152 } else if (size == 1) {
153 PyObject *t = (PyObject *)op;
154 PyString_InternInPlace(&t);
155 op = (PyStringObject *)t;
156 characters[*str & UCHAR_MAX] = op;
157 Py_INCREF(op);
158 }
159 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000160}
161
Christian Heimes44720832008-05-26 13:01:01 +0000162PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000163PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000164{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000165 va_list count;
166 Py_ssize_t n = 0;
167 const char* f;
168 char *s;
169 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000170
Christian Heimes44720832008-05-26 13:01:01 +0000171#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000172 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000173#else
174#ifdef __va_copy
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000175 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000176#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000177 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000178#endif
179#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000180 /* step 1: figure out how large a buffer we need */
181 for (f = format; *f; f++) {
182 if (*f == '%') {
183 const char* p = f;
184 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
185 ;
Christian Heimes44720832008-05-26 13:01:01 +0000186
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000187 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
188 * they don't affect the amount of space we reserve.
189 */
190 if ((*f == 'l' || *f == 'z') &&
191 (f[1] == 'd' || f[1] == 'u'))
192 ++f;
Christian Heimes44720832008-05-26 13:01:01 +0000193
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000194 switch (*f) {
195 case 'c':
196 (void)va_arg(count, int);
197 /* fall through... */
198 case '%':
199 n++;
200 break;
201 case 'd': case 'u': case 'i': case 'x':
202 (void) va_arg(count, int);
203 /* 20 bytes is enough to hold a 64-bit
204 integer. Decimal takes the most space.
205 This isn't enough for octal. */
206 n += 20;
207 break;
208 case 's':
209 s = va_arg(count, char*);
210 n += strlen(s);
211 break;
212 case 'p':
213 (void) va_arg(count, int);
214 /* maximum 64-bit pointer representation:
215 * 0xffffffffffffffff
216 * so 19 characters is enough.
217 * XXX I count 18 -- what's the extra for?
218 */
219 n += 19;
220 break;
221 default:
222 /* if we stumble upon an unknown
223 formatting code, copy the rest of
224 the format string to the output
225 string. (we cannot just skip the
226 code, since there's no way to know
227 what's in the argument list) */
228 n += strlen(p);
229 goto expand;
230 }
231 } else
232 n++;
233 }
Christian Heimes44720832008-05-26 13:01:01 +0000234 expand:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000235 /* step 2: fill the buffer */
236 /* Since we've analyzed how much space we need for the worst case,
237 use sprintf directly instead of the slower PyOS_snprintf. */
238 string = PyString_FromStringAndSize(NULL, n);
239 if (!string)
240 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000241
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000242 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000243
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000244 for (f = format; *f; f++) {
245 if (*f == '%') {
246 const char* p = f++;
247 Py_ssize_t i;
248 int longflag = 0;
249 int size_tflag = 0;
250 /* parse the width.precision part (we're only
251 interested in the precision value, if any) */
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 if (*f == '.') {
256 f++;
257 n = 0;
258 while (isdigit(Py_CHARMASK(*f)))
259 n = (n*10) + *f++ - '0';
260 }
261 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
262 f++;
263 /* handle the long flag, but only for %ld and %lu.
264 others can be added when necessary. */
265 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
266 longflag = 1;
267 ++f;
268 }
269 /* handle the size_t flag. */
270 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
271 size_tflag = 1;
272 ++f;
273 }
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000275 switch (*f) {
276 case 'c':
277 *s++ = va_arg(vargs, int);
278 break;
279 case 'd':
280 if (longflag)
281 sprintf(s, "%ld", va_arg(vargs, long));
282 else if (size_tflag)
283 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
284 va_arg(vargs, Py_ssize_t));
285 else
286 sprintf(s, "%d", va_arg(vargs, int));
287 s += strlen(s);
288 break;
289 case 'u':
290 if (longflag)
291 sprintf(s, "%lu",
292 va_arg(vargs, unsigned long));
293 else if (size_tflag)
294 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
295 va_arg(vargs, size_t));
296 else
297 sprintf(s, "%u",
298 va_arg(vargs, unsigned int));
299 s += strlen(s);
300 break;
301 case 'i':
302 sprintf(s, "%i", va_arg(vargs, int));
303 s += strlen(s);
304 break;
305 case 'x':
306 sprintf(s, "%x", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 's':
310 p = va_arg(vargs, char*);
311 i = strlen(p);
312 if (n > 0 && i > n)
313 i = n;
314 Py_MEMCPY(s, p, i);
315 s += i;
316 break;
317 case 'p':
318 sprintf(s, "%p", va_arg(vargs, void*));
319 /* %p is ill-defined: ensure leading 0x. */
320 if (s[1] == 'X')
321 s[1] = 'x';
322 else if (s[1] != 'x') {
323 memmove(s+2, s, strlen(s)+1);
324 s[0] = '0';
325 s[1] = 'x';
326 }
327 s += strlen(s);
328 break;
329 case '%':
330 *s++ = '%';
331 break;
332 default:
333 strcpy(s, p);
334 s += strlen(s);
335 goto end;
336 }
337 } else
338 *s++ = *f;
339 }
Christian Heimes44720832008-05-26 13:01:01 +0000340
341 end:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000342 _PyString_Resize(&string, s - PyString_AS_STRING(string));
343 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000344}
345
346PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000347PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000348{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000349 PyObject* ret;
350 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000351
352#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000353 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000354#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000355 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000356#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000357 ret = PyString_FromFormatV(format, vargs);
358 va_end(vargs);
359 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000360}
361
362
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000363PyObject *PyString_Decode(const char *s,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000364 Py_ssize_t size,
365 const char *encoding,
366 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000367{
368 PyObject *v, *str;
369
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000370 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000371 if (str == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000372 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000373 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000374 Py_DECREF(str);
375 return v;
376}
377
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000378PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000379 const char *encoding,
380 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000381{
382 PyObject *v;
383
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000384 if (!PyString_Check(str)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000385 PyErr_BadArgument();
386 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000387 }
388
Christian Heimes44720832008-05-26 13:01:01 +0000389 if (encoding == NULL) {
390#ifdef Py_USING_UNICODE
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000391 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000392#else
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000393 PyErr_SetString(PyExc_ValueError, "no encoding specified");
394 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000395#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000396 }
Christian Heimes44720832008-05-26 13:01:01 +0000397
398 /* Decode via the codec registry */
399 v = PyCodec_Decode(str, encoding, errors);
400 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000401 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000402
403 return v;
404
405 onError:
406 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000407}
408
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000409PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000410 const char *encoding,
411 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000412{
Christian Heimes44720832008-05-26 13:01:01 +0000413 PyObject *v;
414
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000415 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000416 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000417 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000418
419#ifdef Py_USING_UNICODE
420 /* Convert Unicode to a string using the default encoding */
421 if (PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000422 PyObject *temp = v;
423 v = PyUnicode_AsEncodedString(v, NULL, NULL);
424 Py_DECREF(temp);
425 if (v == NULL)
426 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000427 }
Christian Heimes44720832008-05-26 13:01:01 +0000428#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000429 if (!PyString_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000430 PyErr_Format(PyExc_TypeError,
431 "decoder did not return a string object (type=%.400s)",
432 Py_TYPE(v)->tp_name);
433 Py_DECREF(v);
434 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000435 }
Christian Heimes44720832008-05-26 13:01:01 +0000436
437 return v;
438
439 onError:
440 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000441}
442
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000443PyObject *PyString_Encode(const char *s,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000444 Py_ssize_t size,
445 const char *encoding,
446 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000447{
Christian Heimes44720832008-05-26 13:01:01 +0000448 PyObject *v, *str;
449
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000450 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000451 if (str == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000452 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000453 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000454 Py_DECREF(str);
455 return v;
456}
457
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000458PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000459 const char *encoding,
460 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000461{
462 PyObject *v;
463
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000464 if (!PyString_Check(str)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000465 PyErr_BadArgument();
466 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000467 }
468
469 if (encoding == NULL) {
470#ifdef Py_USING_UNICODE
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000471 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000472#else
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000473 PyErr_SetString(PyExc_ValueError, "no encoding specified");
474 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000475#endif
476 }
477
478 /* Encode via the codec registry */
479 v = PyCodec_Encode(str, encoding, errors);
480 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000481 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000482
483 return v;
484
485 onError:
486 return NULL;
487}
488
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000489PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000490 const char *encoding,
491 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000492{
493 PyObject *v;
494
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000495 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000496 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000497 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000498
499#ifdef Py_USING_UNICODE
500 /* Convert Unicode to a string using the default encoding */
501 if (PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000502 PyObject *temp = v;
503 v = PyUnicode_AsEncodedString(v, NULL, NULL);
504 Py_DECREF(temp);
505 if (v == NULL)
506 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000507 }
508#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000509 if (!PyString_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000510 PyErr_Format(PyExc_TypeError,
511 "encoder did not return a string object (type=%.400s)",
512 Py_TYPE(v)->tp_name);
513 Py_DECREF(v);
514 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000515 }
516
517 return v;
518
519 onError:
520 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000521}
522
523static void
Christian Heimes44720832008-05-26 13:01:01 +0000524string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000525{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000526 switch (PyString_CHECK_INTERNED(op)) {
527 case SSTATE_NOT_INTERNED:
528 break;
Christian Heimes44720832008-05-26 13:01:01 +0000529
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000530 case SSTATE_INTERNED_MORTAL:
531 /* revive dead object temporarily for DelItem */
532 Py_REFCNT(op) = 3;
533 if (PyDict_DelItem(interned, op) != 0)
534 Py_FatalError(
535 "deletion of interned string failed");
536 break;
Christian Heimes44720832008-05-26 13:01:01 +0000537
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000538 case SSTATE_INTERNED_IMMORTAL:
539 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000540
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000541 default:
542 Py_FatalError("Inconsistent interned string state.");
543 }
544 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000545}
546
Christian Heimes44720832008-05-26 13:01:01 +0000547/* Unescape a backslash-escaped string. If unicode is non-zero,
548 the string is a u-literal. If recode_encoding is non-zero,
549 the string is UTF-8 encoded and should be re-encoded in the
550 specified encoding. */
551
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000552PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000553 Py_ssize_t len,
554 const char *errors,
555 Py_ssize_t unicode,
556 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000557{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000558 int c;
559 char *p, *buf;
560 const char *end;
561 PyObject *v;
562 Py_ssize_t newlen = recode_encoding ? 4*len:len;
563 v = PyString_FromStringAndSize((char *)NULL, newlen);
564 if (v == NULL)
565 return NULL;
566 p = buf = PyString_AsString(v);
567 end = s + len;
568 while (s < end) {
569 if (*s != '\\') {
570 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000571#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000572 if (recode_encoding && (*s & 0x80)) {
573 PyObject *u, *w;
574 char *r;
575 const char* t;
576 Py_ssize_t rn;
577 t = s;
578 /* Decode non-ASCII bytes as UTF-8. */
579 while (t < end && (*t & 0x80)) t++;
580 u = PyUnicode_DecodeUTF8(s, t - s, errors);
581 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000583 /* Recode them in target encoding. */
584 w = PyUnicode_AsEncodedString(
585 u, recode_encoding, errors);
586 Py_DECREF(u);
587 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000588
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000589 /* Append bytes to output buffer. */
590 assert(PyString_Check(w));
591 r = PyString_AS_STRING(w);
592 rn = PyString_GET_SIZE(w);
593 Py_MEMCPY(p, r, rn);
594 p += rn;
595 Py_DECREF(w);
596 s = t;
597 } else {
598 *p++ = *s++;
599 }
Christian Heimes44720832008-05-26 13:01:01 +0000600#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000601 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000602#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000603 continue;
604 }
605 s++;
606 if (s==end) {
607 PyErr_SetString(PyExc_ValueError,
608 "Trailing \\ in string");
609 goto failed;
610 }
611 switch (*s++) {
612 /* XXX This assumes ASCII! */
613 case '\n': break;
614 case '\\': *p++ = '\\'; break;
615 case '\'': *p++ = '\''; break;
616 case '\"': *p++ = '\"'; break;
617 case 'b': *p++ = '\b'; break;
618 case 'f': *p++ = '\014'; break; /* FF */
619 case 't': *p++ = '\t'; break;
620 case 'n': *p++ = '\n'; break;
621 case 'r': *p++ = '\r'; break;
622 case 'v': *p++ = '\013'; break; /* VT */
623 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
624 case '0': case '1': case '2': case '3':
625 case '4': case '5': case '6': case '7':
626 c = s[-1] - '0';
627 if (s < end && '0' <= *s && *s <= '7') {
628 c = (c<<3) + *s++ - '0';
629 if (s < end && '0' <= *s && *s <= '7')
630 c = (c<<3) + *s++ - '0';
631 }
632 *p++ = c;
633 break;
634 case 'x':
635 if (s+1 < end &&
636 isxdigit(Py_CHARMASK(s[0])) &&
637 isxdigit(Py_CHARMASK(s[1])))
638 {
639 unsigned int x = 0;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x = c - '0';
644 else if (islower(c))
645 x = 10 + c - 'a';
646 else
647 x = 10 + c - 'A';
648 x = x << 4;
649 c = Py_CHARMASK(*s);
650 s++;
651 if (isdigit(c))
652 x += c - '0';
653 else if (islower(c))
654 x += 10 + c - 'a';
655 else
656 x += 10 + c - 'A';
657 *p++ = x;
658 break;
659 }
660 if (!errors || strcmp(errors, "strict") == 0) {
661 PyErr_SetString(PyExc_ValueError,
662 "invalid \\x escape");
663 goto failed;
664 }
665 if (strcmp(errors, "replace") == 0) {
666 *p++ = '?';
667 } else if (strcmp(errors, "ignore") == 0)
668 /* do nothing */;
669 else {
670 PyErr_Format(PyExc_ValueError,
671 "decoding error; "
672 "unknown error handling code: %.400s",
673 errors);
674 goto failed;
675 }
Christian Heimes44720832008-05-26 13:01:01 +0000676#ifndef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000677 case 'u':
678 case 'U':
679 case 'N':
680 if (unicode) {
681 PyErr_SetString(PyExc_ValueError,
682 "Unicode escapes not legal "
683 "when Unicode disabled");
684 goto failed;
685 }
Christian Heimes44720832008-05-26 13:01:01 +0000686#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000687 default:
688 *p++ = '\\';
689 s--;
690 goto non_esc; /* an arbitry number of unescaped
691 UTF-8 bytes may follow. */
692 }
693 }
694 if (p-buf < newlen)
695 _PyString_Resize(&v, p - buf);
696 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000697 failed:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000698 Py_DECREF(v);
699 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000700}
701
702/* -------------------------------------------------------------------- */
703/* object api */
704
Christian Heimes1a6387e2008-03-26 12:49:49 +0000705static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000706string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000707{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000708 char *s;
709 Py_ssize_t len;
710 if (PyString_AsStringAndSize(op, &s, &len))
711 return -1;
712 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000713}
714
Christian Heimes44720832008-05-26 13:01:01 +0000715static /*const*/ char *
716string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000717{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000718 char *s;
719 Py_ssize_t len;
720 if (PyString_AsStringAndSize(op, &s, &len))
721 return NULL;
722 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000723}
724
725Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000726PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000727{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000728 if (!PyString_Check(op))
729 return string_getsize(op);
730 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000731}
732
Christian Heimes44720832008-05-26 13:01:01 +0000733/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000734PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000735{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000736 if (!PyString_Check(op))
737 return string_getbuffer(op);
738 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000739}
740
741int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000742PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000743 register char **s,
744 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000745{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000746 if (s == NULL) {
747 PyErr_BadInternalCall();
748 return -1;
749 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000750
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000751 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000752#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000753 if (PyUnicode_Check(obj)) {
754 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
755 if (obj == NULL)
756 return -1;
757 }
758 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000759#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000760 {
761 PyErr_Format(PyExc_TypeError,
762 "expected string or Unicode object, "
763 "%.200s found", Py_TYPE(obj)->tp_name);
764 return -1;
765 }
766 }
Christian Heimes44720832008-05-26 13:01:01 +0000767
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000768 *s = PyString_AS_STRING(obj);
769 if (len != NULL)
770 *len = PyString_GET_SIZE(obj);
771 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
772 PyErr_SetString(PyExc_TypeError,
773 "expected string without null bytes");
774 return -1;
775 }
776 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000777}
778
Christian Heimes1a6387e2008-03-26 12:49:49 +0000779/* -------------------------------------------------------------------- */
780/* Methods */
781
Christian Heimes44720832008-05-26 13:01:01 +0000782#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000784
Christian Heimes1a6387e2008-03-26 12:49:49 +0000785#include "stringlib/count.h"
786#include "stringlib/find.h"
787#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000789#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000790#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792
793
794static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000797 Py_ssize_t i, str_len;
798 char c;
799 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000800
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000801 /* XXX Ought to check for interrupts when writing long strings */
802 if (! PyString_CheckExact(op)) {
803 int ret;
804 /* A str subclass may have its own __str__ method. */
805 op = (PyStringObject *) PyObject_Str((PyObject *)op);
806 if (op == NULL)
807 return -1;
808 ret = string_print(op, fp, flags);
809 Py_DECREF(op);
810 return ret;
811 }
812 if (flags & Py_PRINT_RAW) {
813 char *data = op->ob_sval;
814 Py_ssize_t size = Py_SIZE(op);
815 Py_BEGIN_ALLOW_THREADS
816 while (size > INT_MAX) {
817 /* Very long strings cannot be written atomically.
818 * But don't write exactly INT_MAX bytes at a time
819 * to avoid memory aligment issues.
820 */
821 const int chunk_size = INT_MAX & ~0x3FFF;
822 fwrite(data, 1, chunk_size, fp);
823 data += chunk_size;
824 size -= chunk_size;
825 }
Christian Heimes44720832008-05-26 13:01:01 +0000826#ifdef __VMS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000827 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000828#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000829 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000830#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000831 Py_END_ALLOW_THREADS
832 return 0;
833 }
Christian Heimes44720832008-05-26 13:01:01 +0000834
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000835 /* figure out which quote to use; single is preferred */
836 quote = '\'';
837 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
838 !memchr(op->ob_sval, '"', Py_SIZE(op)))
839 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000840
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000841 str_len = Py_SIZE(op);
842 Py_BEGIN_ALLOW_THREADS
843 fputc(quote, fp);
844 for (i = 0; i < str_len; i++) {
845 /* Since strings are immutable and the caller should have a
846 reference, accessing the interal buffer should not be an issue
847 with the GIL released. */
848 c = op->ob_sval[i];
849 if (c == quote || c == '\\')
850 fprintf(fp, "\\%c", c);
851 else if (c == '\t')
852 fprintf(fp, "\\t");
853 else if (c == '\n')
854 fprintf(fp, "\\n");
855 else if (c == '\r')
856 fprintf(fp, "\\r");
857 else if (c < ' ' || c >= 0x7f)
858 fprintf(fp, "\\x%02x", c & 0xff);
859 else
860 fputc(c, fp);
861 }
862 fputc(quote, fp);
863 Py_END_ALLOW_THREADS
864 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000865}
866
Christian Heimes44720832008-05-26 13:01:01 +0000867PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000868PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000869{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000870 register PyStringObject* op = (PyStringObject*) obj;
871 size_t newsize = 2 + 4 * Py_SIZE(op);
872 PyObject *v;
873 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
874 PyErr_SetString(PyExc_OverflowError,
875 "string is too large to make repr");
876 return NULL;
877 }
878 v = PyString_FromStringAndSize((char *)NULL, newsize);
879 if (v == NULL) {
880 return NULL;
881 }
882 else {
883 register Py_ssize_t i;
884 register char c;
885 register char *p;
886 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000887
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000888 /* figure out which quote to use; single is preferred */
889 quote = '\'';
890 if (smartquotes &&
891 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000894
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000895 p = PyString_AS_STRING(v);
896 *p++ = quote;
897 for (i = 0; i < Py_SIZE(op); i++) {
898 /* There's at least enough room for a hex escape
899 and a closing quote. */
900 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
901 c = op->ob_sval[i];
902 if (c == quote || c == '\\')
903 *p++ = '\\', *p++ = c;
904 else if (c == '\t')
905 *p++ = '\\', *p++ = 't';
906 else if (c == '\n')
907 *p++ = '\\', *p++ = 'n';
908 else if (c == '\r')
909 *p++ = '\\', *p++ = 'r';
910 else if (c < ' ' || c >= 0x7f) {
911 /* For performance, we don't want to call
912 PyOS_snprintf here (extra layers of
913 function call). */
914 sprintf(p, "\\x%02x", c & 0xff);
915 p += 4;
916 }
917 else
918 *p++ = c;
919 }
920 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
921 *p++ = quote;
922 *p = '\0';
923 _PyString_Resize(
924 &v, (p - PyString_AS_STRING(v)));
925 return v;
926 }
Christian Heimes44720832008-05-26 13:01:01 +0000927}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000928
929static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000930string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000931{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000932 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000933}
934
Christian Heimes1a6387e2008-03-26 12:49:49 +0000935static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000936string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000937{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000938 assert(PyString_Check(s));
939 if (PyString_CheckExact(s)) {
940 Py_INCREF(s);
941 return s;
942 }
943 else {
944 /* Subtype -- return genuine string with the same value. */
945 PyStringObject *t = (PyStringObject *) s;
946 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
947 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000948}
949
Christian Heimes44720832008-05-26 13:01:01 +0000950static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000951string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +0000952{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000953 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +0000954}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000955
Christian Heimes44720832008-05-26 13:01:01 +0000956static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000957string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +0000958{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000959 register Py_ssize_t size;
960 register PyStringObject *op;
961 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +0000962#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000963 if (PyUnicode_Check(bb))
964 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +0000965#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000966 if (PyByteArray_Check(bb))
967 return PyByteArray_Concat((PyObject *)a, bb);
968 PyErr_Format(PyExc_TypeError,
969 "cannot concatenate 'str' and '%.200s' objects",
970 Py_TYPE(bb)->tp_name);
971 return NULL;
972 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000973#define b ((PyStringObject *)bb)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000974 /* Optimize cases with empty left or right operand */
975 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
976 PyString_CheckExact(a) && PyString_CheckExact(b)) {
977 if (Py_SIZE(a) == 0) {
978 Py_INCREF(bb);
979 return bb;
980 }
981 Py_INCREF(a);
982 return (PyObject *)a;
983 }
984 size = Py_SIZE(a) + Py_SIZE(b);
985 /* Check that string sizes are not negative, to prevent an
986 overflow in cases where we are passed incorrectly-created
987 strings with negative lengths (due to a bug in other code).
988 */
989 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
990 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
991 PyErr_SetString(PyExc_OverflowError,
992 "strings are too large to concat");
993 return NULL;
994 }
995
996 /* Inline PyObject_NewVar */
997 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
998 PyErr_SetString(PyExc_OverflowError,
999 "strings are too large to concat");
1000 return NULL;
1001 }
1002 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
1003 if (op == NULL)
1004 return PyErr_NoMemory();
1005 PyObject_INIT_VAR(op, &PyString_Type, size);
1006 op->ob_shash = -1;
1007 op->ob_sstate = SSTATE_NOT_INTERNED;
1008 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1009 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1010 op->ob_sval[size] = '\0';
1011 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001012#undef b
1013}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001014
Christian Heimes44720832008-05-26 13:01:01 +00001015static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001016string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001017{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001018 register Py_ssize_t i;
1019 register Py_ssize_t j;
1020 register Py_ssize_t size;
1021 register PyStringObject *op;
1022 size_t nbytes;
1023 if (n < 0)
1024 n = 0;
1025 /* watch out for overflows: the size can overflow int,
1026 * and the # of bytes needed can overflow size_t
1027 */
1028 size = Py_SIZE(a) * n;
1029 if (n && size / n != Py_SIZE(a)) {
1030 PyErr_SetString(PyExc_OverflowError,
1031 "repeated string is too long");
1032 return NULL;
1033 }
1034 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 nbytes = (size_t)size;
1039 if (nbytes + sizeof(PyStringObject) <= nbytes) {
1040 PyErr_SetString(PyExc_OverflowError,
1041 "repeated string is too long");
1042 return NULL;
1043 }
1044 op = (PyStringObject *)
1045 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1046 if (op == NULL)
1047 return PyErr_NoMemory();
1048 PyObject_INIT_VAR(op, &PyString_Type, size);
1049 op->ob_shash = -1;
1050 op->ob_sstate = SSTATE_NOT_INTERNED;
1051 op->ob_sval[size] = '\0';
1052 if (Py_SIZE(a) == 1 && n > 0) {
1053 memset(op->ob_sval, a->ob_sval[0] , n);
1054 return (PyObject *) op;
1055 }
1056 i = 0;
1057 if (i < size) {
1058 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1059 i = Py_SIZE(a);
1060 }
1061 while (i < size) {
1062 j = (i <= size-i) ? i : size-i;
1063 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1064 i += j;
1065 }
1066 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1070
1071static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001072string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001073 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001074 /* j -- may be negative! */
1075{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001076 if (i < 0)
1077 i = 0;
1078 if (j < 0)
1079 j = 0; /* Avoid signed/unsigned bug in next line */
1080 if (j > Py_SIZE(a))
1081 j = Py_SIZE(a);
1082 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1083 /* It's the same as a */
1084 Py_INCREF(a);
1085 return (PyObject *)a;
1086 }
1087 if (j < i)
1088 j = i;
1089 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001090}
1091
1092static int
1093string_contains(PyObject *str_obj, PyObject *sub_obj)
1094{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001095 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001096#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001097 if (PyUnicode_Check(sub_obj))
1098 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001099#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001100 if (!PyString_Check(sub_obj)) {
1101 PyErr_Format(PyExc_TypeError,
1102 "'in <string>' requires string as left operand, "
1103 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1104 return -1;
1105 }
1106 }
Christian Heimes44720832008-05-26 13:01:01 +00001107
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001108 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001109}
1110
1111static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001112string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001113{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001114 char pchar;
1115 PyObject *v;
1116 if (i < 0 || i >= Py_SIZE(a)) {
1117 PyErr_SetString(PyExc_IndexError, "string index out of range");
1118 return NULL;
1119 }
1120 pchar = a->ob_sval[i];
1121 v = (PyObject *)characters[pchar & UCHAR_MAX];
1122 if (v == NULL)
1123 v = PyString_FromStringAndSize(&pchar, 1);
1124 else {
Christian Heimes44720832008-05-26 13:01:01 +00001125#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001126 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001127#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001128 Py_INCREF(v);
1129 }
1130 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001131}
1132
1133static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001134string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001135{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001136 int c;
1137 Py_ssize_t len_a, len_b;
1138 Py_ssize_t min_len;
1139 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001140
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001141 /* Make sure both arguments are strings. */
1142 if (!(PyString_Check(a) && PyString_Check(b))) {
1143 result = Py_NotImplemented;
1144 goto out;
1145 }
1146 if (a == b) {
1147 switch (op) {
1148 case Py_EQ:case Py_LE:case Py_GE:
1149 result = Py_True;
1150 goto out;
1151 case Py_NE:case Py_LT:case Py_GT:
1152 result = Py_False;
1153 goto out;
1154 }
1155 }
1156 if (op == Py_EQ) {
1157 /* Supporting Py_NE here as well does not save
1158 much time, since Py_NE is rarely used. */
1159 if (Py_SIZE(a) == Py_SIZE(b)
1160 && (a->ob_sval[0] == b->ob_sval[0]
1161 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1162 result = Py_True;
1163 } else {
1164 result = Py_False;
1165 }
1166 goto out;
1167 }
1168 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1169 min_len = (len_a < len_b) ? len_a : len_b;
1170 if (min_len > 0) {
1171 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1172 if (c==0)
1173 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1174 } else
1175 c = 0;
1176 if (c == 0)
1177 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1178 switch (op) {
1179 case Py_LT: c = c < 0; break;
1180 case Py_LE: c = c <= 0; break;
1181 case Py_EQ: assert(0); break; /* unreachable */
1182 case Py_NE: c = c != 0; break;
1183 case Py_GT: c = c > 0; break;
1184 case Py_GE: c = c >= 0; break;
1185 default:
1186 result = Py_NotImplemented;
1187 goto out;
1188 }
1189 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001190 out:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001191 Py_INCREF(result);
1192 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001193}
1194
1195int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001196_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001197{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001198 PyStringObject *a = (PyStringObject*) o1;
1199 PyStringObject *b = (PyStringObject*) o2;
1200 return Py_SIZE(a) == Py_SIZE(b)
1201 && *a->ob_sval == *b->ob_sval
1202 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001203}
1204
1205static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001206string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001207{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001208 register Py_ssize_t len;
1209 register unsigned char *p;
1210 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001211
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001212 if (a->ob_shash != -1)
1213 return a->ob_shash;
1214 len = Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001215 /*
1216 We make the hash of the empty string be 0, rather than using
1217 (prefix ^ suffix), since this slightly obfuscates the hash secret
1218 */
1219 if (len == 0) {
1220 a->ob_shash = 0;
1221 return 0;
1222 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001223 p = (unsigned char *) a->ob_sval;
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001224 x = _Py_HashSecret.prefix;
1225 x ^= *p << 7;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001226 while (--len >= 0)
1227 x = (1000003*x) ^ *p++;
1228 x ^= Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001229 x ^= _Py_HashSecret.suffix;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001230 if (x == -1)
1231 x = -2;
1232 a->ob_shash = x;
1233 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001234}
1235
1236static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001237string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001238{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001239 if (PyIndex_Check(item)) {
1240 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1241 if (i == -1 && PyErr_Occurred())
1242 return NULL;
1243 if (i < 0)
1244 i += PyString_GET_SIZE(self);
1245 return string_item(self, i);
1246 }
1247 else if (PySlice_Check(item)) {
1248 Py_ssize_t start, stop, step, slicelength, cur, i;
1249 char* source_buf;
1250 char* result_buf;
1251 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001252
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001253 if (PySlice_GetIndicesEx((PySliceObject*)item,
1254 PyString_GET_SIZE(self),
1255 &start, &stop, &step, &slicelength) < 0) {
1256 return NULL;
1257 }
Christian Heimes44720832008-05-26 13:01:01 +00001258
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001259 if (slicelength <= 0) {
1260 return PyString_FromStringAndSize("", 0);
1261 }
1262 else if (start == 0 && step == 1 &&
1263 slicelength == PyString_GET_SIZE(self) &&
1264 PyString_CheckExact(self)) {
1265 Py_INCREF(self);
1266 return (PyObject *)self;
1267 }
1268 else if (step == 1) {
1269 return PyString_FromStringAndSize(
1270 PyString_AS_STRING(self) + start,
1271 slicelength);
1272 }
1273 else {
1274 source_buf = PyString_AsString((PyObject*)self);
1275 result_buf = (char *)PyMem_Malloc(slicelength);
1276 if (result_buf == NULL)
1277 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001278
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001279 for (cur = start, i = 0; i < slicelength;
1280 cur += step, i++) {
1281 result_buf[i] = source_buf[cur];
1282 }
Christian Heimes44720832008-05-26 13:01:01 +00001283
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001284 result = PyString_FromStringAndSize(result_buf,
1285 slicelength);
1286 PyMem_Free(result_buf);
1287 return result;
1288 }
1289 }
1290 else {
1291 PyErr_Format(PyExc_TypeError,
1292 "string indices must be integers, not %.200s",
1293 Py_TYPE(item)->tp_name);
1294 return NULL;
1295 }
Christian Heimes44720832008-05-26 13:01:01 +00001296}
1297
1298static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001299string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001300{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001301 if ( index != 0 ) {
1302 PyErr_SetString(PyExc_SystemError,
1303 "accessing non-existent string segment");
1304 return -1;
1305 }
1306 *ptr = (void *)self->ob_sval;
1307 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001308}
1309
1310static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001311string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001312{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001313 PyErr_SetString(PyExc_TypeError,
1314 "Cannot use string as modifiable buffer");
1315 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001316}
1317
1318static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001319string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001320{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001321 if ( lenp )
1322 *lenp = Py_SIZE(self);
1323 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001324}
1325
1326static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001327string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001328{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001329 if ( index != 0 ) {
1330 PyErr_SetString(PyExc_SystemError,
1331 "accessing non-existent string segment");
1332 return -1;
1333 }
1334 *ptr = self->ob_sval;
1335 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001336}
1337
1338static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001339string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001340{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001341 return PyBuffer_FillInfo(view, (PyObject*)self,
1342 (void *)self->ob_sval, Py_SIZE(self),
1343 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001344}
1345
1346static PySequenceMethods string_as_sequence = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001347 (lenfunc)string_length, /*sq_length*/
1348 (binaryfunc)string_concat, /*sq_concat*/
1349 (ssizeargfunc)string_repeat, /*sq_repeat*/
1350 (ssizeargfunc)string_item, /*sq_item*/
1351 (ssizessizeargfunc)string_slice, /*sq_slice*/
1352 0, /*sq_ass_item*/
1353 0, /*sq_ass_slice*/
1354 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001355};
1356
1357static PyMappingMethods string_as_mapping = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001358 (lenfunc)string_length,
1359 (binaryfunc)string_subscript,
1360 0,
Christian Heimes44720832008-05-26 13:01:01 +00001361};
1362
1363static PyBufferProcs string_as_buffer = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001364 (readbufferproc)string_buffer_getreadbuf,
1365 (writebufferproc)string_buffer_getwritebuf,
1366 (segcountproc)string_buffer_getsegcount,
1367 (charbufferproc)string_buffer_getcharbuf,
1368 (getbufferproc)string_buffer_getbuffer,
1369 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001370};
1371
1372
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001373
Christian Heimes44720832008-05-26 13:01:01 +00001374#define LEFTSTRIP 0
1375#define RIGHTSTRIP 1
1376#define BOTHSTRIP 2
1377
1378/* Arrays indexed by above */
1379static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1380
1381#define STRIPNAME(i) (stripformat[i]+3)
1382
Christian Heimes1a6387e2008-03-26 12:49:49 +00001383
1384/* Don't call if length < 2 */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001385#define Py_STRING_MATCH(target, offset, pattern, length) \
1386 (target[offset] == pattern[0] && \
1387 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001388 !memcmp(target+offset+1, pattern+1, length-2) )
1389
1390
Christian Heimes1a6387e2008-03-26 12:49:49 +00001391/* Overallocate the initial list to reduce the number of reallocs for small
1392 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1393 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1394 text (roughly 11 words per line) and field delimited data (usually 1-10
1395 fields). For large strings the split algorithms are bandwidth limited
1396 so increasing the preallocation likely will not improve things.*/
1397
1398#define MAX_PREALLOC 12
1399
1400/* 5 splits gives 6 elements */
1401#define PREALLOC_SIZE(maxsplit) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001402 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001403
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001404#define SPLIT_APPEND(data, left, right) \
1405 str = PyString_FromStringAndSize((data) + (left), \
1406 (right) - (left)); \
1407 if (str == NULL) \
1408 goto onError; \
1409 if (PyList_Append(list, str)) { \
1410 Py_DECREF(str); \
1411 goto onError; \
1412 } \
1413 else \
1414 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001415
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001416#define SPLIT_ADD(data, left, right) { \
1417 str = PyString_FromStringAndSize((data) + (left), \
1418 (right) - (left)); \
1419 if (str == NULL) \
1420 goto onError; \
1421 if (count < MAX_PREALLOC) { \
1422 PyList_SET_ITEM(list, count, str); \
1423 } else { \
1424 if (PyList_Append(list, str)) { \
1425 Py_DECREF(str); \
1426 goto onError; \
1427 } \
1428 else \
1429 Py_DECREF(str); \
1430 } \
1431 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001432
1433/* Always force the list to the expected size. */
1434#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1435
Christian Heimes44720832008-05-26 13:01:01 +00001436#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1437#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1438#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1439#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001440
1441Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001442split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001443{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001444 const char *s = PyString_AS_STRING(self);
1445 Py_ssize_t i, j, count=0;
1446 PyObject *str;
1447 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001448
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001449 if (list == NULL)
1450 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001451
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001452 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001453
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001454 while (maxsplit-- > 0) {
1455 SKIP_SPACE(s, i, len);
1456 if (i==len) break;
1457 j = i; i++;
1458 SKIP_NONSPACE(s, i, len);
1459 if (j == 0 && i == len && PyString_CheckExact(self)) {
1460 /* No whitespace in self, so just use it as list[0] */
1461 Py_INCREF(self);
1462 PyList_SET_ITEM(list, 0, (PyObject *)self);
1463 count++;
1464 break;
1465 }
1466 SPLIT_ADD(s, j, i);
1467 }
Christian Heimes44720832008-05-26 13:01:01 +00001468
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001469 if (i < len) {
1470 /* Only occurs when maxsplit was reached */
1471 /* Skip any remaining whitespace and copy to end of string */
1472 SKIP_SPACE(s, i, len);
1473 if (i != len)
1474 SPLIT_ADD(s, i, len);
1475 }
1476 FIX_PREALLOC_SIZE(list);
1477 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001478 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001479 Py_DECREF(list);
1480 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001481}
1482
Christian Heimes1a6387e2008-03-26 12:49:49 +00001483Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001484split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001485{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001486 const char *s = PyString_AS_STRING(self);
1487 register Py_ssize_t i, j, count=0;
1488 PyObject *str;
1489 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001490
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001491 if (list == NULL)
1492 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001493
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001494 i = j = 0;
1495 while ((j < len) && (maxcount-- > 0)) {
1496 for(; j<len; j++) {
1497 /* I found that using memchr makes no difference */
1498 if (s[j] == ch) {
1499 SPLIT_ADD(s, i, j);
1500 i = j = j + 1;
1501 break;
1502 }
1503 }
1504 }
1505 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1506 /* ch not in self, so just use self as list[0] */
1507 Py_INCREF(self);
1508 PyList_SET_ITEM(list, 0, (PyObject *)self);
1509 count++;
1510 }
1511 else if (i <= len) {
1512 SPLIT_ADD(s, i, len);
1513 }
1514 FIX_PREALLOC_SIZE(list);
1515 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001516
1517 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001518 Py_DECREF(list);
1519 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001520}
1521
1522PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001523"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001524\n\
Christian Heimes44720832008-05-26 13:01:01 +00001525Return a list of the words in the string S, using sep as the\n\
1526delimiter string. If maxsplit is given, at most maxsplit\n\
1527splits are done. If sep is not specified or is None, any\n\
1528whitespace string is a separator and empty strings are removed\n\
1529from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001530
1531static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001532string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001533{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001534 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1535 Py_ssize_t maxsplit = -1, count=0;
1536 const char *s = PyString_AS_STRING(self), *sub;
1537 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001538#ifdef USE_FAST
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001539 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001540#endif
1541
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001542 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1543 return NULL;
1544 if (maxsplit < 0)
1545 maxsplit = PY_SSIZE_T_MAX;
1546 if (subobj == Py_None)
1547 return split_whitespace(self, len, maxsplit);
1548 if (PyString_Check(subobj)) {
1549 sub = PyString_AS_STRING(subobj);
1550 n = PyString_GET_SIZE(subobj);
1551 }
Christian Heimes44720832008-05-26 13:01:01 +00001552#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001553 else if (PyUnicode_Check(subobj))
1554 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001555#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001556 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1557 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001558
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001559 if (n == 0) {
1560 PyErr_SetString(PyExc_ValueError, "empty separator");
1561 return NULL;
1562 }
1563 else if (n == 1)
1564 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001565
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001566 list = PyList_New(PREALLOC_SIZE(maxsplit));
1567 if (list == NULL)
1568 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001569
1570#ifdef USE_FAST
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001571 i = j = 0;
1572 while (maxsplit-- > 0) {
1573 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1574 if (pos < 0)
1575 break;
1576 j = i+pos;
1577 SPLIT_ADD(s, i, j);
1578 i = j + n;
1579 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001580#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001581 i = j = 0;
1582 while ((j+n <= len) && (maxsplit-- > 0)) {
1583 for (; j+n <= len; j++) {
1584 if (Py_STRING_MATCH(s, j, sub, n)) {
1585 SPLIT_ADD(s, i, j);
1586 i = j = j + n;
1587 break;
1588 }
1589 }
1590 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001591#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001592 SPLIT_ADD(s, i, len);
1593 FIX_PREALLOC_SIZE(list);
1594 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001595
Christian Heimes44720832008-05-26 13:01:01 +00001596 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001597 Py_DECREF(list);
1598 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001599}
1600
1601PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001602"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001603\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001604Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001605the separator itself, and the part after it. If the separator is not\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001606found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001607
1608static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001609string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001610{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001611 const char *sep;
1612 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001613
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001614 if (PyString_Check(sep_obj)) {
1615 sep = PyString_AS_STRING(sep_obj);
1616 sep_len = PyString_GET_SIZE(sep_obj);
1617 }
Christian Heimes44720832008-05-26 13:01:01 +00001618#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001619 else if (PyUnicode_Check(sep_obj))
1620 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001621#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001622 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1623 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001624
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001625 return stringlib_partition(
1626 (PyObject*) self,
1627 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1628 sep_obj, sep, sep_len
1629 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001630}
1631
1632PyDoc_STRVAR(rpartition__doc__,
Ezio Melottidabb5f72010-01-25 11:46:11 +00001633"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001634\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001635Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001636the part before it, the separator itself, and the part after it. If the\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001637separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001638
1639static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001640string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001641{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001642 const char *sep;
1643 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001644
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001645 if (PyString_Check(sep_obj)) {
1646 sep = PyString_AS_STRING(sep_obj);
1647 sep_len = PyString_GET_SIZE(sep_obj);
1648 }
Christian Heimes44720832008-05-26 13:01:01 +00001649#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001650 else if (PyUnicode_Check(sep_obj))
1651 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001652#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001653 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1654 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001655
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001656 return stringlib_rpartition(
1657 (PyObject*) self,
1658 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1659 sep_obj, sep, sep_len
1660 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001661}
1662
1663Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001664rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001665{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001666 const char *s = PyString_AS_STRING(self);
1667 Py_ssize_t i, j, count=0;
1668 PyObject *str;
1669 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001670
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001671 if (list == NULL)
1672 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001673
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001674 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001675
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001676 while (maxsplit-- > 0) {
1677 RSKIP_SPACE(s, i);
1678 if (i<0) break;
1679 j = i; i--;
1680 RSKIP_NONSPACE(s, i);
1681 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1682 /* No whitespace in self, so just use it as list[0] */
1683 Py_INCREF(self);
1684 PyList_SET_ITEM(list, 0, (PyObject *)self);
1685 count++;
1686 break;
1687 }
1688 SPLIT_ADD(s, i + 1, j + 1);
1689 }
1690 if (i >= 0) {
1691 /* Only occurs when maxsplit was reached */
1692 /* Skip any remaining whitespace and copy to beginning of string */
1693 RSKIP_SPACE(s, i);
1694 if (i >= 0)
1695 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001696
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001697 }
1698 FIX_PREALLOC_SIZE(list);
1699 if (PyList_Reverse(list) < 0)
1700 goto onError;
1701 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001702 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001703 Py_DECREF(list);
1704 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001705}
1706
1707Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001708rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001709{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001710 const char *s = PyString_AS_STRING(self);
1711 register Py_ssize_t i, j, count=0;
1712 PyObject *str;
1713 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001714
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001715 if (list == NULL)
1716 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001717
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001718 i = j = len - 1;
1719 while ((i >= 0) && (maxcount-- > 0)) {
1720 for (; i >= 0; i--) {
1721 if (s[i] == ch) {
1722 SPLIT_ADD(s, i + 1, j + 1);
1723 j = i = i - 1;
1724 break;
1725 }
1726 }
1727 }
1728 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1729 /* ch not in self, so just use self as list[0] */
1730 Py_INCREF(self);
1731 PyList_SET_ITEM(list, 0, (PyObject *)self);
1732 count++;
1733 }
1734 else if (j >= -1) {
1735 SPLIT_ADD(s, 0, j + 1);
1736 }
1737 FIX_PREALLOC_SIZE(list);
1738 if (PyList_Reverse(list) < 0)
1739 goto onError;
1740 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001741
Christian Heimes44720832008-05-26 13:01:01 +00001742 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001743 Py_DECREF(list);
1744 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001745}
1746
1747PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001748"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001749\n\
Christian Heimes44720832008-05-26 13:01:01 +00001750Return a list of the words in the string S, using sep as the\n\
1751delimiter string, starting at the end of the string and working\n\
1752to the front. If maxsplit is given, at most maxsplit splits are\n\
1753done. If sep is not specified or is None, any whitespace string\n\
1754is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001755
1756static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001757string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001758{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001759 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1760 Py_ssize_t maxsplit = -1, count=0;
1761 const char *s, *sub;
1762 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001763
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001764 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1765 return NULL;
1766 if (maxsplit < 0)
1767 maxsplit = PY_SSIZE_T_MAX;
1768 if (subobj == Py_None)
1769 return rsplit_whitespace(self, len, maxsplit);
1770 if (PyString_Check(subobj)) {
1771 sub = PyString_AS_STRING(subobj);
1772 n = PyString_GET_SIZE(subobj);
1773 }
Christian Heimes44720832008-05-26 13:01:01 +00001774#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001775 else if (PyUnicode_Check(subobj))
1776 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001777#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001778 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1779 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001780
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001781 if (n == 0) {
1782 PyErr_SetString(PyExc_ValueError, "empty separator");
1783 return NULL;
1784 }
1785 else if (n == 1)
1786 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001787
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001788 list = PyList_New(PREALLOC_SIZE(maxsplit));
1789 if (list == NULL)
1790 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001791
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001792 j = len;
1793 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001794
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001795 s = PyString_AS_STRING(self);
1796 while ( (i >= 0) && (maxsplit-- > 0) ) {
1797 for (; i>=0; i--) {
1798 if (Py_STRING_MATCH(s, i, sub, n)) {
1799 SPLIT_ADD(s, i + n, j);
1800 j = i;
1801 i -= n;
1802 break;
1803 }
1804 }
1805 }
1806 SPLIT_ADD(s, 0, j);
1807 FIX_PREALLOC_SIZE(list);
1808 if (PyList_Reverse(list) < 0)
1809 goto onError;
1810 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001811
1812onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001813 Py_DECREF(list);
1814 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001815}
1816
1817
1818PyDoc_STRVAR(join__doc__,
Georg Brandl5d2eb342009-10-27 15:08:27 +00001819"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001820\n\
1821Return a string which is the concatenation of the strings in the\n\
Georg Brandl5d2eb342009-10-27 15:08:27 +00001822iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001823
1824static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001825string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001826{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001827 char *sep = PyString_AS_STRING(self);
1828 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1829 PyObject *res = NULL;
1830 char *p;
1831 Py_ssize_t seqlen = 0;
1832 size_t sz = 0;
1833 Py_ssize_t i;
1834 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001835
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001836 seq = PySequence_Fast(orig, "");
1837 if (seq == NULL) {
1838 return NULL;
1839 }
Christian Heimes44720832008-05-26 13:01:01 +00001840
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001841 seqlen = PySequence_Size(seq);
1842 if (seqlen == 0) {
1843 Py_DECREF(seq);
1844 return PyString_FromString("");
1845 }
1846 if (seqlen == 1) {
1847 item = PySequence_Fast_GET_ITEM(seq, 0);
1848 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1849 Py_INCREF(item);
1850 Py_DECREF(seq);
1851 return item;
1852 }
1853 }
Christian Heimes44720832008-05-26 13:01:01 +00001854
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001855 /* There are at least two things to join, or else we have a subclass
1856 * of the builtin types in the sequence.
1857 * Do a pre-pass to figure out the total amount of space we'll
1858 * need (sz), see whether any argument is absurd, and defer to
1859 * the Unicode join if appropriate.
1860 */
1861 for (i = 0; i < seqlen; i++) {
1862 const size_t old_sz = sz;
1863 item = PySequence_Fast_GET_ITEM(seq, i);
1864 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001865#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001866 if (PyUnicode_Check(item)) {
1867 /* Defer to Unicode join.
1868 * CAUTION: There's no gurantee that the
1869 * original sequence can be iterated over
1870 * again, so we must pass seq here.
1871 */
1872 PyObject *result;
1873 result = PyUnicode_Join((PyObject *)self, seq);
1874 Py_DECREF(seq);
1875 return result;
1876 }
Christian Heimes44720832008-05-26 13:01:01 +00001877#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001878 PyErr_Format(PyExc_TypeError,
1879 "sequence item %zd: expected string,"
1880 " %.80s found",
1881 i, Py_TYPE(item)->tp_name);
1882 Py_DECREF(seq);
1883 return NULL;
1884 }
1885 sz += PyString_GET_SIZE(item);
1886 if (i != 0)
1887 sz += seplen;
1888 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1889 PyErr_SetString(PyExc_OverflowError,
1890 "join() result is too long for a Python string");
1891 Py_DECREF(seq);
1892 return NULL;
1893 }
1894 }
Christian Heimes44720832008-05-26 13:01:01 +00001895
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001896 /* Allocate result space. */
1897 res = PyString_FromStringAndSize((char*)NULL, sz);
1898 if (res == NULL) {
1899 Py_DECREF(seq);
1900 return NULL;
1901 }
Christian Heimes44720832008-05-26 13:01:01 +00001902
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001903 /* Catenate everything. */
1904 p = PyString_AS_STRING(res);
1905 for (i = 0; i < seqlen; ++i) {
1906 size_t n;
1907 item = PySequence_Fast_GET_ITEM(seq, i);
1908 n = PyString_GET_SIZE(item);
1909 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1910 p += n;
1911 if (i < seqlen - 1) {
1912 Py_MEMCPY(p, sep, seplen);
1913 p += seplen;
1914 }
1915 }
Christian Heimes44720832008-05-26 13:01:01 +00001916
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001917 Py_DECREF(seq);
1918 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001919}
1920
1921PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001922_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001923{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001924 assert(sep != NULL && PyString_Check(sep));
1925 assert(x != NULL);
1926 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001927}
1928
1929Py_LOCAL_INLINE(void)
1930string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1931{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001932 if (*end > len)
1933 *end = len;
1934 else if (*end < 0)
1935 *end += len;
1936 if (*end < 0)
1937 *end = 0;
1938 if (*start < 0)
1939 *start += len;
1940 if (*start < 0)
1941 *start = 0;
Christian Heimes44720832008-05-26 13:01:01 +00001942}
1943
1944Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001945string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001946{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001947 PyObject *subobj;
1948 const char *sub;
1949 Py_ssize_t sub_len;
1950 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1951 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes44720832008-05-26 13:01:01 +00001952
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001953 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1954 &obj_start, &obj_end))
1955 return -2;
1956 /* To support None in "start" and "end" arguments, meaning
1957 the same as if they were not passed.
1958 */
1959 if (obj_start != Py_None)
1960 if (!_PyEval_SliceIndex(obj_start, &start))
1961 return -2;
1962 if (obj_end != Py_None)
1963 if (!_PyEval_SliceIndex(obj_end, &end))
1964 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001965
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001966 if (PyString_Check(subobj)) {
1967 sub = PyString_AS_STRING(subobj);
1968 sub_len = PyString_GET_SIZE(subobj);
1969 }
Christian Heimes44720832008-05-26 13:01:01 +00001970#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001971 else if (PyUnicode_Check(subobj))
1972 return PyUnicode_Find(
1973 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001974#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001975 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1976 /* XXX - the "expected a character buffer object" is pretty
1977 confusing for a non-expert. remap to something else ? */
1978 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001979
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001980 if (dir > 0)
1981 return stringlib_find_slice(
1982 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1983 sub, sub_len, start, end);
1984 else
1985 return stringlib_rfind_slice(
1986 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1987 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001988}
1989
1990
1991PyDoc_STRVAR(find__doc__,
1992"S.find(sub [,start [,end]]) -> int\n\
1993\n\
1994Return the lowest index in S where substring sub is found,\n\
1995such that sub is contained within s[start:end]. Optional\n\
1996arguments start and end are interpreted as in slice notation.\n\
1997\n\
1998Return -1 on failure.");
1999
2000static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002001string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002002{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002003 Py_ssize_t result = string_find_internal(self, args, +1);
2004 if (result == -2)
2005 return NULL;
2006 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002007}
2008
2009
2010PyDoc_STRVAR(index__doc__,
2011"S.index(sub [,start [,end]]) -> int\n\
2012\n\
2013Like S.find() but raise ValueError when the substring is not found.");
2014
2015static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002016string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002017{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002018 Py_ssize_t result = string_find_internal(self, args, +1);
2019 if (result == -2)
2020 return NULL;
2021 if (result == -1) {
2022 PyErr_SetString(PyExc_ValueError,
2023 "substring not found");
2024 return NULL;
2025 }
2026 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002027}
2028
2029
2030PyDoc_STRVAR(rfind__doc__,
2031"S.rfind(sub [,start [,end]]) -> int\n\
2032\n\
2033Return the highest index in S where substring sub is found,\n\
2034such that sub is contained within s[start:end]. Optional\n\
2035arguments start and end are interpreted as in slice notation.\n\
2036\n\
2037Return -1 on failure.");
2038
2039static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002040string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002041{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002042 Py_ssize_t result = string_find_internal(self, args, -1);
2043 if (result == -2)
2044 return NULL;
2045 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002046}
2047
2048
2049PyDoc_STRVAR(rindex__doc__,
2050"S.rindex(sub [,start [,end]]) -> int\n\
2051\n\
2052Like S.rfind() but raise ValueError when the substring is not found.");
2053
2054static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002055string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002056{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002057 Py_ssize_t result = string_find_internal(self, args, -1);
2058 if (result == -2)
2059 return NULL;
2060 if (result == -1) {
2061 PyErr_SetString(PyExc_ValueError,
2062 "substring not found");
2063 return NULL;
2064 }
2065 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002066}
2067
2068
2069Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002070do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002071{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002072 char *s = PyString_AS_STRING(self);
2073 Py_ssize_t len = PyString_GET_SIZE(self);
2074 char *sep = PyString_AS_STRING(sepobj);
2075 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2076 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002077
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002078 i = 0;
2079 if (striptype != RIGHTSTRIP) {
2080 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2081 i++;
2082 }
2083 }
Christian Heimes44720832008-05-26 13:01:01 +00002084
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002085 j = len;
2086 if (striptype != LEFTSTRIP) {
2087 do {
2088 j--;
2089 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2090 j++;
2091 }
Christian Heimes44720832008-05-26 13:01:01 +00002092
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002093 if (i == 0 && j == len && PyString_CheckExact(self)) {
2094 Py_INCREF(self);
2095 return (PyObject*)self;
2096 }
2097 else
2098 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002099}
2100
2101
2102Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002103do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002104{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002105 char *s = PyString_AS_STRING(self);
2106 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002107
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002108 i = 0;
2109 if (striptype != RIGHTSTRIP) {
2110 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2111 i++;
2112 }
2113 }
Christian Heimes44720832008-05-26 13:01:01 +00002114
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002115 j = len;
2116 if (striptype != LEFTSTRIP) {
2117 do {
2118 j--;
2119 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2120 j++;
2121 }
Christian Heimes44720832008-05-26 13:01:01 +00002122
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002123 if (i == 0 && j == len && PyString_CheckExact(self)) {
2124 Py_INCREF(self);
2125 return (PyObject*)self;
2126 }
2127 else
2128 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002129}
2130
2131
2132Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002133do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002134{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002135 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002136
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002137 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2138 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002139
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002140 if (sep != NULL && sep != Py_None) {
2141 if (PyString_Check(sep))
2142 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00002143#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002144 else if (PyUnicode_Check(sep)) {
2145 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2146 PyObject *res;
2147 if (uniself==NULL)
2148 return NULL;
2149 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2150 striptype, sep);
2151 Py_DECREF(uniself);
2152 return res;
2153 }
Christian Heimes44720832008-05-26 13:01:01 +00002154#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002155 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00002156#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002157 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00002158#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002159 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00002160#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002161 STRIPNAME(striptype));
2162 return NULL;
2163 }
Christian Heimes44720832008-05-26 13:01:01 +00002164
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002165 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00002166}
2167
2168
2169PyDoc_STRVAR(strip__doc__,
2170"S.strip([chars]) -> string or unicode\n\
2171\n\
2172Return a copy of the string S with leading and trailing\n\
2173whitespace removed.\n\
2174If chars is given and not None, remove characters in chars instead.\n\
2175If chars is unicode, S will be converted to unicode before stripping");
2176
2177static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002178string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002179{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002180 if (PyTuple_GET_SIZE(args) == 0)
2181 return do_strip(self, BOTHSTRIP); /* Common case */
2182 else
2183 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00002184}
2185
2186
2187PyDoc_STRVAR(lstrip__doc__,
2188"S.lstrip([chars]) -> string or unicode\n\
2189\n\
2190Return a copy of the string S with leading whitespace removed.\n\
2191If chars is given and not None, remove characters in chars instead.\n\
2192If chars is unicode, S will be converted to unicode before stripping");
2193
2194static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002195string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002196{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002197 if (PyTuple_GET_SIZE(args) == 0)
2198 return do_strip(self, LEFTSTRIP); /* Common case */
2199 else
2200 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00002201}
2202
2203
2204PyDoc_STRVAR(rstrip__doc__,
2205"S.rstrip([chars]) -> string or unicode\n\
2206\n\
2207Return a copy of the string S with trailing whitespace removed.\n\
2208If chars is given and not None, remove characters in chars instead.\n\
2209If chars is unicode, S will be converted to unicode before stripping");
2210
2211static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002212string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002213{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002214 if (PyTuple_GET_SIZE(args) == 0)
2215 return do_strip(self, RIGHTSTRIP); /* Common case */
2216 else
2217 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00002218}
2219
2220
2221PyDoc_STRVAR(lower__doc__,
2222"S.lower() -> string\n\
2223\n\
2224Return a copy of the string S converted to lowercase.");
2225
2226/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2227#ifndef _tolower
2228#define _tolower tolower
2229#endif
2230
2231static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002232string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002233{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002234 char *s;
2235 Py_ssize_t i, n = PyString_GET_SIZE(self);
2236 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002237
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002238 newobj = PyString_FromStringAndSize(NULL, n);
2239 if (!newobj)
2240 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002241
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002242 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002243
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002244 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002245
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002246 for (i = 0; i < n; i++) {
2247 int c = Py_CHARMASK(s[i]);
2248 if (isupper(c))
2249 s[i] = _tolower(c);
2250 }
Christian Heimes44720832008-05-26 13:01:01 +00002251
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002252 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002253}
2254
2255PyDoc_STRVAR(upper__doc__,
2256"S.upper() -> string\n\
2257\n\
2258Return a copy of the string S converted to uppercase.");
2259
2260#ifndef _toupper
2261#define _toupper toupper
2262#endif
2263
2264static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002265string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002266{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002267 char *s;
2268 Py_ssize_t i, n = PyString_GET_SIZE(self);
2269 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002270
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002271 newobj = PyString_FromStringAndSize(NULL, n);
2272 if (!newobj)
2273 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002274
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002275 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002276
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002277 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002278
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002279 for (i = 0; i < n; i++) {
2280 int c = Py_CHARMASK(s[i]);
2281 if (islower(c))
2282 s[i] = _toupper(c);
2283 }
Christian Heimes44720832008-05-26 13:01:01 +00002284
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002285 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002286}
2287
2288PyDoc_STRVAR(title__doc__,
2289"S.title() -> string\n\
2290\n\
2291Return a titlecased version of S, i.e. words start with uppercase\n\
2292characters, all remaining cased characters have lowercase.");
2293
2294static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002295string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002296{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002297 char *s = PyString_AS_STRING(self), *s_new;
2298 Py_ssize_t i, n = PyString_GET_SIZE(self);
2299 int previous_is_cased = 0;
2300 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002301
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002302 newobj = PyString_FromStringAndSize(NULL, n);
2303 if (newobj == NULL)
2304 return NULL;
2305 s_new = PyString_AsString(newobj);
2306 for (i = 0; i < n; i++) {
2307 int c = Py_CHARMASK(*s++);
2308 if (islower(c)) {
2309 if (!previous_is_cased)
2310 c = toupper(c);
2311 previous_is_cased = 1;
2312 } else if (isupper(c)) {
2313 if (previous_is_cased)
2314 c = tolower(c);
2315 previous_is_cased = 1;
2316 } else
2317 previous_is_cased = 0;
2318 *s_new++ = c;
2319 }
2320 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002321}
2322
2323PyDoc_STRVAR(capitalize__doc__,
2324"S.capitalize() -> string\n\
2325\n\
2326Return a copy of the string S with only its first character\n\
2327capitalized.");
2328
2329static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002330string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002331{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002332 char *s = PyString_AS_STRING(self), *s_new;
2333 Py_ssize_t i, n = PyString_GET_SIZE(self);
2334 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002335
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002336 newobj = PyString_FromStringAndSize(NULL, n);
2337 if (newobj == NULL)
2338 return NULL;
2339 s_new = PyString_AsString(newobj);
2340 if (0 < n) {
2341 int c = Py_CHARMASK(*s++);
2342 if (islower(c))
2343 *s_new = toupper(c);
2344 else
2345 *s_new = c;
2346 s_new++;
2347 }
2348 for (i = 1; i < n; i++) {
2349 int c = Py_CHARMASK(*s++);
2350 if (isupper(c))
2351 *s_new = tolower(c);
2352 else
2353 *s_new = c;
2354 s_new++;
2355 }
2356 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002357}
2358
2359
2360PyDoc_STRVAR(count__doc__,
2361"S.count(sub[, start[, end]]) -> int\n\
2362\n\
2363Return the number of non-overlapping occurrences of substring sub in\n\
2364string S[start:end]. Optional arguments start and end are interpreted\n\
2365as in slice notation.");
2366
2367static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002368string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002369{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002370 PyObject *sub_obj;
2371 const char *str = PyString_AS_STRING(self), *sub;
2372 Py_ssize_t sub_len;
2373 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002374
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002375 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2376 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2377 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002378
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002379 if (PyString_Check(sub_obj)) {
2380 sub = PyString_AS_STRING(sub_obj);
2381 sub_len = PyString_GET_SIZE(sub_obj);
2382 }
Christian Heimes44720832008-05-26 13:01:01 +00002383#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002384 else if (PyUnicode_Check(sub_obj)) {
2385 Py_ssize_t count;
2386 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2387 if (count == -1)
2388 return NULL;
2389 else
2390 return PyInt_FromSsize_t(count);
2391 }
Christian Heimes44720832008-05-26 13:01:01 +00002392#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002393 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2394 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002395
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002396 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002397
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002398 return PyInt_FromSsize_t(
2399 stringlib_count(str + start, end - start, sub, sub_len)
2400 );
Christian Heimes44720832008-05-26 13:01:01 +00002401}
2402
2403PyDoc_STRVAR(swapcase__doc__,
2404"S.swapcase() -> string\n\
2405\n\
2406Return a copy of the string S with uppercase characters\n\
2407converted to lowercase and vice versa.");
2408
2409static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002410string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002411{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002412 char *s = PyString_AS_STRING(self), *s_new;
2413 Py_ssize_t i, n = PyString_GET_SIZE(self);
2414 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002415
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002416 newobj = PyString_FromStringAndSize(NULL, n);
2417 if (newobj == NULL)
2418 return NULL;
2419 s_new = PyString_AsString(newobj);
2420 for (i = 0; i < n; i++) {
2421 int c = Py_CHARMASK(*s++);
2422 if (islower(c)) {
2423 *s_new = toupper(c);
2424 }
2425 else if (isupper(c)) {
2426 *s_new = tolower(c);
2427 }
2428 else
2429 *s_new = c;
2430 s_new++;
2431 }
2432 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002433}
2434
2435
2436PyDoc_STRVAR(translate__doc__,
2437"S.translate(table [,deletechars]) -> string\n\
2438\n\
2439Return a copy of the string S, where all characters occurring\n\
2440in the optional argument deletechars are removed, and the\n\
2441remaining characters have been mapped through the given\n\
2442translation table, which must be a string of length 256.");
2443
2444static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002445string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002446{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002447 register char *input, *output;
2448 const char *table;
2449 register Py_ssize_t i, c, changed = 0;
2450 PyObject *input_obj = (PyObject*)self;
2451 const char *output_start, *del_table=NULL;
2452 Py_ssize_t inlen, tablen, dellen = 0;
2453 PyObject *result;
2454 int trans_table[256];
2455 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002456
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002457 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2458 &tableobj, &delobj))
2459 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002460
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002461 if (PyString_Check(tableobj)) {
2462 table = PyString_AS_STRING(tableobj);
2463 tablen = PyString_GET_SIZE(tableobj);
2464 }
2465 else if (tableobj == Py_None) {
2466 table = NULL;
2467 tablen = 256;
2468 }
Christian Heimes44720832008-05-26 13:01:01 +00002469#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002470 else if (PyUnicode_Check(tableobj)) {
2471 /* Unicode .translate() does not support the deletechars
2472 parameter; instead a mapping to None will cause characters
2473 to be deleted. */
2474 if (delobj != NULL) {
2475 PyErr_SetString(PyExc_TypeError,
2476 "deletions are implemented differently for unicode");
2477 return NULL;
2478 }
2479 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2480 }
Christian Heimes44720832008-05-26 13:01:01 +00002481#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002482 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2483 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002484
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002485 if (tablen != 256) {
2486 PyErr_SetString(PyExc_ValueError,
2487 "translation table must be 256 characters long");
2488 return NULL;
2489 }
Christian Heimes44720832008-05-26 13:01:01 +00002490
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002491 if (delobj != NULL) {
2492 if (PyString_Check(delobj)) {
2493 del_table = PyString_AS_STRING(delobj);
2494 dellen = PyString_GET_SIZE(delobj);
2495 }
Christian Heimes44720832008-05-26 13:01:01 +00002496#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002497 else if (PyUnicode_Check(delobj)) {
2498 PyErr_SetString(PyExc_TypeError,
2499 "deletions are implemented differently for unicode");
2500 return NULL;
2501 }
Christian Heimes44720832008-05-26 13:01:01 +00002502#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002503 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2504 return NULL;
2505 }
2506 else {
2507 del_table = NULL;
2508 dellen = 0;
2509 }
Christian Heimes44720832008-05-26 13:01:01 +00002510
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002511 inlen = PyString_GET_SIZE(input_obj);
2512 result = PyString_FromStringAndSize((char *)NULL, inlen);
2513 if (result == NULL)
2514 return NULL;
2515 output_start = output = PyString_AsString(result);
2516 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002517
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002518 if (dellen == 0 && table != NULL) {
2519 /* If no deletions are required, use faster code */
2520 for (i = inlen; --i >= 0; ) {
2521 c = Py_CHARMASK(*input++);
2522 if (Py_CHARMASK((*output++ = table[c])) != c)
2523 changed = 1;
2524 }
2525 if (changed || !PyString_CheckExact(input_obj))
2526 return result;
2527 Py_DECREF(result);
2528 Py_INCREF(input_obj);
2529 return input_obj;
2530 }
Christian Heimes44720832008-05-26 13:01:01 +00002531
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002532 if (table == NULL) {
2533 for (i = 0; i < 256; i++)
2534 trans_table[i] = Py_CHARMASK(i);
2535 } else {
2536 for (i = 0; i < 256; i++)
2537 trans_table[i] = Py_CHARMASK(table[i]);
2538 }
Christian Heimes44720832008-05-26 13:01:01 +00002539
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002540 for (i = 0; i < dellen; i++)
2541 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002542
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002543 for (i = inlen; --i >= 0; ) {
2544 c = Py_CHARMASK(*input++);
2545 if (trans_table[c] != -1)
2546 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2547 continue;
2548 changed = 1;
2549 }
2550 if (!changed && PyString_CheckExact(input_obj)) {
2551 Py_DECREF(result);
2552 Py_INCREF(input_obj);
2553 return input_obj;
2554 }
2555 /* Fix the size of the resulting string */
2556 if (inlen > 0)
2557 _PyString_Resize(&result, output - output_start);
2558 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002559}
2560
2561
2562#define FORWARD 1
2563#define REVERSE -1
2564
2565/* find and count characters and substrings */
2566
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002567#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002568 ((char *)memchr((const void *)(target), c, target_len))
2569
2570/* String ops must return a string. */
2571/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002572Py_LOCAL(PyStringObject *)
2573return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002574{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002575 if (PyString_CheckExact(self)) {
2576 Py_INCREF(self);
2577 return self;
2578 }
2579 return (PyStringObject *)PyString_FromStringAndSize(
2580 PyString_AS_STRING(self),
2581 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002582}
2583
2584Py_LOCAL_INLINE(Py_ssize_t)
2585countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2586{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002587 Py_ssize_t count=0;
2588 const char *start=target;
2589 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002590
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002591 while ( (start=findchar(start, end-start, c)) != NULL ) {
2592 count++;
2593 if (count >= maxcount)
2594 break;
2595 start += 1;
2596 }
2597 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002598}
2599
2600Py_LOCAL(Py_ssize_t)
2601findstring(const char *target, Py_ssize_t target_len,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002602 const char *pattern, Py_ssize_t pattern_len,
2603 Py_ssize_t start,
2604 Py_ssize_t end,
2605 int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002606{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002607 if (start < 0) {
2608 start += target_len;
2609 if (start < 0)
2610 start = 0;
2611 }
2612 if (end > target_len) {
2613 end = target_len;
2614 } else if (end < 0) {
2615 end += target_len;
2616 if (end < 0)
2617 end = 0;
2618 }
Christian Heimes44720832008-05-26 13:01:01 +00002619
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002620 /* zero-length substrings always match at the first attempt */
2621 if (pattern_len == 0)
2622 return (direction > 0) ? start : end;
Christian Heimes44720832008-05-26 13:01:01 +00002623
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002624 end -= pattern_len;
Christian Heimes44720832008-05-26 13:01:01 +00002625
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002626 if (direction < 0) {
2627 for (; end >= start; end--)
2628 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2629 return end;
2630 } else {
2631 for (; start <= end; start++)
2632 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2633 return start;
2634 }
2635 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00002636}
2637
2638Py_LOCAL_INLINE(Py_ssize_t)
2639countstring(const char *target, Py_ssize_t target_len,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002640 const char *pattern, Py_ssize_t pattern_len,
2641 Py_ssize_t start,
2642 Py_ssize_t end,
2643 int direction, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002644{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002645 Py_ssize_t count=0;
Christian Heimes44720832008-05-26 13:01:01 +00002646
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002647 if (start < 0) {
2648 start += target_len;
2649 if (start < 0)
2650 start = 0;
2651 }
2652 if (end > target_len) {
2653 end = target_len;
2654 } else if (end < 0) {
2655 end += target_len;
2656 if (end < 0)
2657 end = 0;
2658 }
Christian Heimes44720832008-05-26 13:01:01 +00002659
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002660 /* zero-length substrings match everywhere */
2661 if (pattern_len == 0 || maxcount == 0) {
2662 if (target_len+1 < maxcount)
2663 return target_len+1;
2664 return maxcount;
2665 }
Christian Heimes44720832008-05-26 13:01:01 +00002666
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002667 end -= pattern_len;
2668 if (direction < 0) {
2669 for (; (end >= start); end--)
2670 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2671 count++;
2672 if (--maxcount <= 0) break;
2673 end -= pattern_len-1;
2674 }
2675 } else {
2676 for (; (start <= end); start++)
2677 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2678 count++;
2679 if (--maxcount <= 0)
2680 break;
2681 start += pattern_len-1;
2682 }
2683 }
2684 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002685}
2686
2687
2688/* Algorithms for different cases of string replacement */
2689
2690/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002691Py_LOCAL(PyStringObject *)
2692replace_interleave(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002693 const char *to_s, Py_ssize_t to_len,
2694 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002695{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002696 char *self_s, *result_s;
2697 Py_ssize_t self_len, result_len;
2698 Py_ssize_t count, i, product;
2699 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002700
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002701 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002702
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002703 /* 1 at the end plus 1 after every character */
2704 count = self_len+1;
2705 if (maxcount < count)
2706 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002707
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002708 /* Check for overflow */
2709 /* result_len = count * to_len + self_len; */
2710 product = count * to_len;
2711 if (product / to_len != count) {
2712 PyErr_SetString(PyExc_OverflowError,
2713 "replace string is too long");
2714 return NULL;
2715 }
2716 result_len = product + self_len;
2717 if (result_len < 0) {
2718 PyErr_SetString(PyExc_OverflowError,
2719 "replace string is too long");
2720 return NULL;
2721 }
Christian Heimes44720832008-05-26 13:01:01 +00002722
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002723 if (! (result = (PyStringObject *)
2724 PyString_FromStringAndSize(NULL, result_len)) )
2725 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002726
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002727 self_s = PyString_AS_STRING(self);
2728 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002729
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002730 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002731
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002732 /* Lay the first one down (guaranteed this will occur) */
2733 Py_MEMCPY(result_s, to_s, to_len);
2734 result_s += to_len;
2735 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002736
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002737 for (i=0; i<count; i++) {
2738 *result_s++ = *self_s++;
2739 Py_MEMCPY(result_s, to_s, to_len);
2740 result_s += to_len;
2741 }
2742
2743 /* Copy the rest of the original string */
2744 Py_MEMCPY(result_s, self_s, self_len-i);
2745
2746 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002747}
2748
2749/* Special case for deleting a single character */
2750/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002751Py_LOCAL(PyStringObject *)
2752replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002753 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002754{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002755 char *self_s, *result_s;
2756 char *start, *next, *end;
2757 Py_ssize_t self_len, result_len;
2758 Py_ssize_t count;
2759 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002760
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002761 self_len = PyString_GET_SIZE(self);
2762 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002763
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002764 count = countchar(self_s, self_len, from_c, maxcount);
2765 if (count == 0) {
2766 return return_self(self);
2767 }
Christian Heimes44720832008-05-26 13:01:01 +00002768
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002769 result_len = self_len - count; /* from_len == 1 */
2770 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002771
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002772 if ( (result = (PyStringObject *)
2773 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2774 return NULL;
2775 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002776
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002777 start = self_s;
2778 end = self_s + self_len;
2779 while (count-- > 0) {
2780 next = findchar(start, end-start, from_c);
2781 if (next == NULL)
2782 break;
2783 Py_MEMCPY(result_s, start, next-start);
2784 result_s += (next-start);
2785 start = next+1;
2786 }
2787 Py_MEMCPY(result_s, start, end-start);
2788
2789 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002790}
2791
2792/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2793
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002794Py_LOCAL(PyStringObject *)
2795replace_delete_substring(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002796 const char *from_s, Py_ssize_t from_len,
2797 Py_ssize_t maxcount) {
2798 char *self_s, *result_s;
2799 char *start, *next, *end;
2800 Py_ssize_t self_len, result_len;
2801 Py_ssize_t count, offset;
2802 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002803
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002804 self_len = PyString_GET_SIZE(self);
2805 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002806
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002807 count = countstring(self_s, self_len,
2808 from_s, from_len,
2809 0, self_len, 1,
2810 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002811
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002812 if (count == 0) {
2813 /* no matches */
2814 return return_self(self);
2815 }
Christian Heimes44720832008-05-26 13:01:01 +00002816
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002817 result_len = self_len - (count * from_len);
2818 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002819
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002820 if ( (result = (PyStringObject *)
2821 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2822 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002823
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002824 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002825
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002826 start = self_s;
2827 end = self_s + self_len;
2828 while (count-- > 0) {
2829 offset = findstring(start, end-start,
2830 from_s, from_len,
2831 0, end-start, FORWARD);
2832 if (offset == -1)
2833 break;
2834 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002835
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002836 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002837
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002838 result_s += (next-start);
2839 start = next+from_len;
2840 }
2841 Py_MEMCPY(result_s, start, end-start);
2842 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002843}
2844
2845/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002846Py_LOCAL(PyStringObject *)
2847replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002848 char from_c, char to_c,
2849 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002850{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002851 char *self_s, *result_s, *start, *end, *next;
2852 Py_ssize_t self_len;
2853 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002854
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002855 /* The result string will be the same size */
2856 self_s = PyString_AS_STRING(self);
2857 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002858
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002859 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002860
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002861 if (next == NULL) {
2862 /* No matches; return the original string */
2863 return return_self(self);
2864 }
Christian Heimes44720832008-05-26 13:01:01 +00002865
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002866 /* Need to make a new string */
2867 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2868 if (result == NULL)
2869 return NULL;
2870 result_s = PyString_AS_STRING(result);
2871 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002872
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002873 /* change everything in-place, starting with this one */
2874 start = result_s + (next-self_s);
2875 *start = to_c;
2876 start++;
2877 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002878
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002879 while (--maxcount > 0) {
2880 next = findchar(start, end-start, from_c);
2881 if (next == NULL)
2882 break;
2883 *next = to_c;
2884 start = next+1;
2885 }
Christian Heimes44720832008-05-26 13:01:01 +00002886
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002887 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002888}
2889
2890/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002891Py_LOCAL(PyStringObject *)
2892replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002893 const char *from_s, Py_ssize_t from_len,
2894 const char *to_s, Py_ssize_t to_len,
2895 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002896{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002897 char *result_s, *start, *end;
2898 char *self_s;
2899 Py_ssize_t self_len, offset;
2900 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002901
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002902 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002903
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002904 self_s = PyString_AS_STRING(self);
2905 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002906
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002907 offset = findstring(self_s, self_len,
2908 from_s, from_len,
2909 0, self_len, FORWARD);
2910 if (offset == -1) {
2911 /* No matches; return the original string */
2912 return return_self(self);
2913 }
Christian Heimes44720832008-05-26 13:01:01 +00002914
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002915 /* Need to make a new string */
2916 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2917 if (result == NULL)
2918 return NULL;
2919 result_s = PyString_AS_STRING(result);
2920 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002921
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002922 /* change everything in-place, starting with this one */
2923 start = result_s + offset;
2924 Py_MEMCPY(start, to_s, from_len);
2925 start += from_len;
2926 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002927
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002928 while ( --maxcount > 0) {
2929 offset = findstring(start, end-start,
2930 from_s, from_len,
2931 0, end-start, FORWARD);
2932 if (offset==-1)
2933 break;
2934 Py_MEMCPY(start+offset, to_s, from_len);
2935 start += offset+from_len;
2936 }
Christian Heimes44720832008-05-26 13:01:01 +00002937
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002938 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002939}
2940
2941/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002942Py_LOCAL(PyStringObject *)
2943replace_single_character(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002944 char from_c,
2945 const char *to_s, Py_ssize_t to_len,
2946 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002947{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002948 char *self_s, *result_s;
2949 char *start, *next, *end;
2950 Py_ssize_t self_len, result_len;
2951 Py_ssize_t count, product;
2952 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002953
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002954 self_s = PyString_AS_STRING(self);
2955 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002956
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002957 count = countchar(self_s, self_len, from_c, maxcount);
2958 if (count == 0) {
2959 /* no matches, return unchanged */
2960 return return_self(self);
2961 }
Christian Heimes44720832008-05-26 13:01:01 +00002962
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002963 /* use the difference between current and new, hence the "-1" */
2964 /* result_len = self_len + count * (to_len-1) */
2965 product = count * (to_len-1);
2966 if (product / (to_len-1) != count) {
2967 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2968 return NULL;
2969 }
2970 result_len = self_len + product;
2971 if (result_len < 0) {
2972 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2973 return NULL;
2974 }
Christian Heimes44720832008-05-26 13:01:01 +00002975
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002976 if ( (result = (PyStringObject *)
2977 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2978 return NULL;
2979 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002980
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002981 start = self_s;
2982 end = self_s + self_len;
2983 while (count-- > 0) {
2984 next = findchar(start, end-start, from_c);
2985 if (next == NULL)
2986 break;
Christian Heimes44720832008-05-26 13:01:01 +00002987
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002988 if (next == start) {
2989 /* replace with the 'to' */
2990 Py_MEMCPY(result_s, to_s, to_len);
2991 result_s += to_len;
2992 start += 1;
2993 } else {
2994 /* copy the unchanged old then the 'to' */
2995 Py_MEMCPY(result_s, start, next-start);
2996 result_s += (next-start);
2997 Py_MEMCPY(result_s, to_s, to_len);
2998 result_s += to_len;
2999 start = next+1;
3000 }
3001 }
3002 /* Copy the remainder of the remaining string */
3003 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00003004
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003005 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003006}
3007
3008/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003009Py_LOCAL(PyStringObject *)
3010replace_substring(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003011 const char *from_s, Py_ssize_t from_len,
3012 const char *to_s, Py_ssize_t to_len,
3013 Py_ssize_t maxcount) {
3014 char *self_s, *result_s;
3015 char *start, *next, *end;
3016 Py_ssize_t self_len, result_len;
3017 Py_ssize_t count, offset, product;
3018 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003019
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003020 self_s = PyString_AS_STRING(self);
3021 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003022
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003023 count = countstring(self_s, self_len,
3024 from_s, from_len,
3025 0, self_len, FORWARD, maxcount);
3026 if (count == 0) {
3027 /* no matches, return unchanged */
3028 return return_self(self);
3029 }
Christian Heimes44720832008-05-26 13:01:01 +00003030
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003031 /* Check for overflow */
3032 /* result_len = self_len + count * (to_len-from_len) */
3033 product = count * (to_len-from_len);
3034 if (product / (to_len-from_len) != count) {
3035 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3036 return NULL;
3037 }
3038 result_len = self_len + product;
3039 if (result_len < 0) {
3040 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3041 return NULL;
3042 }
Christian Heimes44720832008-05-26 13:01:01 +00003043
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003044 if ( (result = (PyStringObject *)
3045 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3046 return NULL;
3047 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003048
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003049 start = self_s;
3050 end = self_s + self_len;
3051 while (count-- > 0) {
3052 offset = findstring(start, end-start,
3053 from_s, from_len,
3054 0, end-start, FORWARD);
3055 if (offset == -1)
3056 break;
3057 next = start+offset;
3058 if (next == start) {
3059 /* replace with the 'to' */
3060 Py_MEMCPY(result_s, to_s, to_len);
3061 result_s += to_len;
3062 start += from_len;
3063 } else {
3064 /* copy the unchanged old then the 'to' */
3065 Py_MEMCPY(result_s, start, next-start);
3066 result_s += (next-start);
3067 Py_MEMCPY(result_s, to_s, to_len);
3068 result_s += to_len;
3069 start = next+from_len;
3070 }
3071 }
3072 /* Copy the remainder of the remaining string */
3073 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00003074
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003075 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003076}
3077
3078
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003079Py_LOCAL(PyStringObject *)
3080replace(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003081 const char *from_s, Py_ssize_t from_len,
3082 const char *to_s, Py_ssize_t to_len,
3083 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00003084{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003085 if (maxcount < 0) {
3086 maxcount = PY_SSIZE_T_MAX;
3087 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3088 /* nothing to do; return the original string */
3089 return return_self(self);
3090 }
Christian Heimes44720832008-05-26 13:01:01 +00003091
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003092 if (maxcount == 0 ||
3093 (from_len == 0 && to_len == 0)) {
3094 /* nothing to do; return the original string */
3095 return return_self(self);
3096 }
Christian Heimes44720832008-05-26 13:01:01 +00003097
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003098 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00003099
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003100 if (from_len == 0) {
3101 /* insert the 'to' string everywhere. */
3102 /* >>> "Python".replace("", ".") */
3103 /* '.P.y.t.h.o.n.' */
3104 return replace_interleave(self, to_s, to_len, maxcount);
3105 }
Christian Heimes44720832008-05-26 13:01:01 +00003106
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003107 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3108 /* point for an empty self string to generate a non-empty string */
3109 /* Special case so the remaining code always gets a non-empty string */
3110 if (PyString_GET_SIZE(self) == 0) {
3111 return return_self(self);
3112 }
Christian Heimes44720832008-05-26 13:01:01 +00003113
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003114 if (to_len == 0) {
3115 /* delete all occurances of 'from' string */
3116 if (from_len == 1) {
3117 return replace_delete_single_character(
3118 self, from_s[0], maxcount);
3119 } else {
3120 return replace_delete_substring(self, from_s, from_len, maxcount);
3121 }
3122 }
Christian Heimes44720832008-05-26 13:01:01 +00003123
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003124 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00003125
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003126 if (from_len == to_len) {
3127 if (from_len == 1) {
3128 return replace_single_character_in_place(
3129 self,
3130 from_s[0],
3131 to_s[0],
3132 maxcount);
3133 } else {
3134 return replace_substring_in_place(
3135 self, from_s, from_len, to_s, to_len, maxcount);
3136 }
3137 }
Christian Heimes44720832008-05-26 13:01:01 +00003138
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003139 /* Otherwise use the more generic algorithms */
3140 if (from_len == 1) {
3141 return replace_single_character(self, from_s[0],
3142 to_s, to_len, maxcount);
3143 } else {
3144 /* len('from')>=2, len('to')>=1 */
3145 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3146 }
Christian Heimes44720832008-05-26 13:01:01 +00003147}
3148
3149PyDoc_STRVAR(replace__doc__,
Ezio Melotti6327bf12010-06-26 18:47:01 +00003150"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003151\n\
3152Return a copy of string S with all occurrences of substring\n\
3153old replaced by new. If the optional argument count is\n\
3154given, only the first count occurrences are replaced.");
3155
3156static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003157string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003158{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003159 Py_ssize_t count = -1;
3160 PyObject *from, *to;
3161 const char *from_s, *to_s;
3162 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00003163
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003164 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3165 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003166
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003167 if (PyString_Check(from)) {
3168 from_s = PyString_AS_STRING(from);
3169 from_len = PyString_GET_SIZE(from);
3170 }
Christian Heimes44720832008-05-26 13:01:01 +00003171#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003172 if (PyUnicode_Check(from))
3173 return PyUnicode_Replace((PyObject *)self,
3174 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00003175#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003176 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3177 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003178
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003179 if (PyString_Check(to)) {
3180 to_s = PyString_AS_STRING(to);
3181 to_len = PyString_GET_SIZE(to);
3182 }
Christian Heimes44720832008-05-26 13:01:01 +00003183#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003184 else if (PyUnicode_Check(to))
3185 return PyUnicode_Replace((PyObject *)self,
3186 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00003187#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003188 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3189 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003190
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003191 return (PyObject *)replace((PyStringObject *) self,
3192 from_s, from_len,
3193 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00003194}
3195
3196/** End DALKE **/
3197
3198/* Matches the end (direction >= 0) or start (direction < 0) of self
3199 * against substr, using the start and end arguments. Returns
3200 * -1 on error, 0 if not found and 1 if found.
3201 */
3202Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003203_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003204 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00003205{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003206 Py_ssize_t len = PyString_GET_SIZE(self);
3207 Py_ssize_t slen;
3208 const char* sub;
3209 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00003210
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003211 if (PyString_Check(substr)) {
3212 sub = PyString_AS_STRING(substr);
3213 slen = PyString_GET_SIZE(substr);
3214 }
Christian Heimes44720832008-05-26 13:01:01 +00003215#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003216 else if (PyUnicode_Check(substr))
3217 return PyUnicode_Tailmatch((PyObject *)self,
3218 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00003219#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003220 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3221 return -1;
3222 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003223
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003224 string_adjust_indices(&start, &end, len);
Christian Heimes44720832008-05-26 13:01:01 +00003225
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003226 if (direction < 0) {
3227 /* startswith */
3228 if (start+slen > len)
3229 return 0;
3230 } else {
3231 /* endswith */
3232 if (end-start < slen || start > len)
3233 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003234
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003235 if (end-slen > start)
3236 start = end - slen;
3237 }
3238 if (end-start >= slen)
3239 return ! memcmp(str+start, sub, slen);
3240 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003241}
3242
3243
3244PyDoc_STRVAR(startswith__doc__,
3245"S.startswith(prefix[, start[, end]]) -> bool\n\
3246\n\
3247Return True if S starts with the specified prefix, False otherwise.\n\
3248With optional start, test S beginning at that position.\n\
3249With optional end, stop comparing S at that position.\n\
3250prefix can also be a tuple of strings to try.");
3251
3252static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003253string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003254{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003255 Py_ssize_t start = 0;
3256 Py_ssize_t end = PY_SSIZE_T_MAX;
3257 PyObject *subobj;
3258 int result;
Christian Heimes44720832008-05-26 13:01:01 +00003259
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003260 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3261 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3262 return NULL;
3263 if (PyTuple_Check(subobj)) {
3264 Py_ssize_t i;
3265 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3266 result = _string_tailmatch(self,
3267 PyTuple_GET_ITEM(subobj, i),
3268 start, end, -1);
3269 if (result == -1)
3270 return NULL;
3271 else if (result) {
3272 Py_RETURN_TRUE;
3273 }
3274 }
3275 Py_RETURN_FALSE;
3276 }
3277 result = _string_tailmatch(self, subobj, start, end, -1);
3278 if (result == -1)
3279 return NULL;
3280 else
3281 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00003282}
3283
3284
3285PyDoc_STRVAR(endswith__doc__,
3286"S.endswith(suffix[, start[, end]]) -> bool\n\
3287\n\
3288Return True if S ends with the specified suffix, False otherwise.\n\
3289With optional start, test S beginning at that position.\n\
3290With optional end, stop comparing S at that position.\n\
3291suffix can also be a tuple of strings to try.");
3292
3293static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003294string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003295{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003296 Py_ssize_t start = 0;
3297 Py_ssize_t end = PY_SSIZE_T_MAX;
3298 PyObject *subobj;
3299 int result;
Christian Heimes44720832008-05-26 13:01:01 +00003300
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003301 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3302 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3303 return NULL;
3304 if (PyTuple_Check(subobj)) {
3305 Py_ssize_t i;
3306 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3307 result = _string_tailmatch(self,
3308 PyTuple_GET_ITEM(subobj, i),
3309 start, end, +1);
3310 if (result == -1)
3311 return NULL;
3312 else if (result) {
3313 Py_RETURN_TRUE;
3314 }
3315 }
3316 Py_RETURN_FALSE;
3317 }
3318 result = _string_tailmatch(self, subobj, start, end, +1);
3319 if (result == -1)
3320 return NULL;
3321 else
3322 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00003323}
3324
3325
3326PyDoc_STRVAR(encode__doc__,
3327"S.encode([encoding[,errors]]) -> object\n\
3328\n\
3329Encodes S using the codec registered for encoding. encoding defaults\n\
3330to the default encoding. errors may be given to set a different error\n\
3331handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3332a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3333'xmlcharrefreplace' as well as any other name registered with\n\
3334codecs.register_error that is able to handle UnicodeEncodeErrors.");
3335
3336static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003337string_encode(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003338{
3339 char *encoding = NULL;
3340 char *errors = NULL;
3341 PyObject *v;
3342
3343 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003344 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003345 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003346 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003347 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003348 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003349 PyErr_Format(PyExc_TypeError,
3350 "encoder did not return a string/unicode object "
3351 "(type=%.400s)",
3352 Py_TYPE(v)->tp_name);
3353 Py_DECREF(v);
3354 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003355 }
3356 return v;
3357
3358 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003359 return NULL;
3360}
3361
Christian Heimes44720832008-05-26 13:01:01 +00003362
3363PyDoc_STRVAR(decode__doc__,
3364"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003365\n\
Christian Heimes44720832008-05-26 13:01:01 +00003366Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003367to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003368handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3369a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00003370as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003371able to handle UnicodeDecodeErrors.");
3372
3373static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003374string_decode(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003375{
Christian Heimes44720832008-05-26 13:01:01 +00003376 char *encoding = NULL;
3377 char *errors = NULL;
3378 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003379
3380 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003381 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003382 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003383 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003384 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003385 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003386 PyErr_Format(PyExc_TypeError,
3387 "decoder did not return a string/unicode object "
3388 "(type=%.400s)",
3389 Py_TYPE(v)->tp_name);
3390 Py_DECREF(v);
3391 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003392 }
3393 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003394
Christian Heimes44720832008-05-26 13:01:01 +00003395 onError:
3396 return NULL;
3397}
3398
3399
3400PyDoc_STRVAR(expandtabs__doc__,
3401"S.expandtabs([tabsize]) -> string\n\
3402\n\
3403Return a copy of S where all tab characters are expanded using spaces.\n\
3404If tabsize is not given, a tab size of 8 characters is assumed.");
3405
3406static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003407string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003408{
3409 const char *e, *p, *qe;
3410 char *q;
3411 Py_ssize_t i, j, incr;
3412 PyObject *u;
3413 int tabsize = 8;
3414
3415 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003416 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003417
3418 /* First pass: determine size of output string */
3419 i = 0; /* chars up to and including most recent \n or \r */
3420 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003421 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3422 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003423 if (*p == '\t') {
3424 if (tabsize > 0) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003425 incr = tabsize - (j % tabsize);
3426 if (j > PY_SSIZE_T_MAX - incr)
3427 goto overflow1;
3428 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003429 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003430 }
3431 else {
3432 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003433 goto overflow1;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003434 j++;
3435 if (*p == '\n' || *p == '\r') {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003436 if (i > PY_SSIZE_T_MAX - j)
3437 goto overflow1;
3438 i += j;
3439 j = 0;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003440 }
3441 }
Christian Heimes44720832008-05-26 13:01:01 +00003442
3443 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003444 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003445
3446 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003447 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003448 if (!u)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003449 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003450
3451 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003452 q = PyString_AS_STRING(u); /* next output char */
3453 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003454
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003455 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003456 if (*p == '\t') {
3457 if (tabsize > 0) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003458 i = tabsize - (j % tabsize);
3459 j += i;
3460 while (i--) {
3461 if (q >= qe)
3462 goto overflow2;
3463 *q++ = ' ';
3464 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003465 }
3466 }
3467 else {
3468 if (q >= qe)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003469 goto overflow2;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003470 *q++ = *p;
3471 j++;
3472 if (*p == '\n' || *p == '\r')
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003473 j = 0;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003474 }
Christian Heimes44720832008-05-26 13:01:01 +00003475
3476 return u;
3477
3478 overflow2:
3479 Py_DECREF(u);
3480 overflow1:
3481 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3482 return NULL;
3483}
3484
3485Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003486pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003487{
3488 PyObject *u;
3489
3490 if (left < 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003491 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003492 if (right < 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003493 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003494
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003495 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003496 Py_INCREF(self);
3497 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003498 }
3499
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003500 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003501 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003502 if (u) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003503 if (left)
3504 memset(PyString_AS_STRING(u), fill, left);
3505 Py_MEMCPY(PyString_AS_STRING(u) + left,
3506 PyString_AS_STRING(self),
3507 PyString_GET_SIZE(self));
3508 if (right)
3509 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3510 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003511 }
3512
3513 return u;
3514}
3515
3516PyDoc_STRVAR(ljust__doc__,
3517"S.ljust(width[, fillchar]) -> string\n"
3518"\n"
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00003519"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003520"done using the specified fill character (default is a space).");
3521
3522static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003523string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003524{
3525 Py_ssize_t width;
3526 char fillchar = ' ';
3527
3528 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003529 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003530
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003531 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003532 Py_INCREF(self);
3533 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003534 }
3535
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003536 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003537}
3538
3539
3540PyDoc_STRVAR(rjust__doc__,
3541"S.rjust(width[, fillchar]) -> string\n"
3542"\n"
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00003543"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003544"done using the specified fill character (default is a space)");
3545
3546static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003547string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003548{
3549 Py_ssize_t width;
3550 char fillchar = ' ';
3551
3552 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003553 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003554
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003555 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003556 Py_INCREF(self);
3557 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003558 }
3559
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003560 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003561}
3562
3563
3564PyDoc_STRVAR(center__doc__,
3565"S.center(width[, fillchar]) -> string\n"
3566"\n"
3567"Return S centered in a string of length width. Padding is\n"
3568"done using the specified fill character (default is a space)");
3569
3570static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003571string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003572{
3573 Py_ssize_t marg, left;
3574 Py_ssize_t width;
3575 char fillchar = ' ';
3576
3577 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003578 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003579
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003580 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003581 Py_INCREF(self);
3582 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003583 }
3584
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003585 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003586 left = marg / 2 + (marg & width & 1);
3587
3588 return pad(self, left, marg - left, fillchar);
3589}
3590
3591PyDoc_STRVAR(zfill__doc__,
3592"S.zfill(width) -> string\n"
3593"\n"
3594"Pad a numeric string S with zeros on the left, to fill a field\n"
3595"of the specified width. The string S is never truncated.");
3596
3597static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003598string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003599{
3600 Py_ssize_t fill;
3601 PyObject *s;
3602 char *p;
3603 Py_ssize_t width;
3604
3605 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003606 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003607
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003608 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003609 if (PyString_CheckExact(self)) {
3610 Py_INCREF(self);
3611 return (PyObject*) self;
3612 }
3613 else
3614 return PyString_FromStringAndSize(
3615 PyString_AS_STRING(self),
3616 PyString_GET_SIZE(self)
3617 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003618 }
3619
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003620 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003621
Christian Heimes44720832008-05-26 13:01:01 +00003622 s = pad(self, fill, 0, '0');
3623
3624 if (s == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003625 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003626
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003627 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003628 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003629 /* move sign to beginning of string */
3630 p[0] = p[fill];
3631 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003632 }
3633
3634 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003635}
3636
Christian Heimes44720832008-05-26 13:01:01 +00003637PyDoc_STRVAR(isspace__doc__,
3638"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003639\n\
Christian Heimes44720832008-05-26 13:01:01 +00003640Return True if all characters in S are whitespace\n\
3641and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003642
Christian Heimes44720832008-05-26 13:01:01 +00003643static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003644string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003645{
Christian Heimes44720832008-05-26 13:01:01 +00003646 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003647 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003648 register const unsigned char *e;
3649
3650 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003651 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003652 isspace(*p))
3653 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003654
3655 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003656 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003657 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003658
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003659 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003660 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003661 if (!isspace(*p))
3662 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003663 }
Christian Heimes44720832008-05-26 13:01:01 +00003664 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003665}
3666
Christian Heimes44720832008-05-26 13:01:01 +00003667
3668PyDoc_STRVAR(isalpha__doc__,
3669"S.isalpha() -> bool\n\
3670\n\
3671Return True if all characters in S are alphabetic\n\
3672and there is at least one character in S, False otherwise.");
3673
3674static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003675string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003676{
Christian Heimes44720832008-05-26 13:01:01 +00003677 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003678 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003679 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003680
Christian Heimes44720832008-05-26 13:01:01 +00003681 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003682 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003683 isalpha(*p))
3684 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003685
3686 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003687 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003688 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003689
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003690 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003691 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003692 if (!isalpha(*p))
3693 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003694 }
Christian Heimes44720832008-05-26 13:01:01 +00003695 return PyBool_FromLong(1);
3696}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003697
Christian Heimes44720832008-05-26 13:01:01 +00003698
3699PyDoc_STRVAR(isalnum__doc__,
3700"S.isalnum() -> bool\n\
3701\n\
3702Return True if all characters in S are alphanumeric\n\
3703and there is at least one character in S, False otherwise.");
3704
3705static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003706string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003707{
3708 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003709 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003710 register const unsigned char *e;
3711
3712 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003713 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003714 isalnum(*p))
3715 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003716
3717 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003718 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003719 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003720
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003721 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003722 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003723 if (!isalnum(*p))
3724 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003725 }
3726 return PyBool_FromLong(1);
3727}
3728
3729
3730PyDoc_STRVAR(isdigit__doc__,
3731"S.isdigit() -> bool\n\
3732\n\
3733Return True if all characters in S are digits\n\
3734and there is at least one character in S, False otherwise.");
3735
3736static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003737string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003738{
3739 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003740 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003741 register const unsigned char *e;
3742
3743 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003744 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003745 isdigit(*p))
3746 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003747
3748 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003749 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003750 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003751
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003752 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003753 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003754 if (!isdigit(*p))
3755 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003756 }
3757 return PyBool_FromLong(1);
3758}
3759
3760
3761PyDoc_STRVAR(islower__doc__,
3762"S.islower() -> bool\n\
3763\n\
3764Return True if all cased characters in S are lowercase and there is\n\
3765at least one cased character in S, False otherwise.");
3766
3767static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003768string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003769{
3770 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003771 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003772 register const unsigned char *e;
3773 int cased;
3774
3775 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003776 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003777 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003778
3779 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003780 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003781 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003782
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003783 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003784 cased = 0;
3785 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003786 if (isupper(*p))
3787 return PyBool_FromLong(0);
3788 else if (!cased && islower(*p))
3789 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003790 }
3791 return PyBool_FromLong(cased);
3792}
3793
3794
3795PyDoc_STRVAR(isupper__doc__,
3796"S.isupper() -> bool\n\
3797\n\
3798Return True if all cased characters in S are uppercase and there is\n\
3799at least one cased character in S, False otherwise.");
3800
3801static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003802string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003803{
3804 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003805 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003806 register const unsigned char *e;
3807 int cased;
3808
3809 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003810 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003811 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003812
3813 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003814 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003815 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003816
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003817 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003818 cased = 0;
3819 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003820 if (islower(*p))
3821 return PyBool_FromLong(0);
3822 else if (!cased && isupper(*p))
3823 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003824 }
3825 return PyBool_FromLong(cased);
3826}
3827
3828
3829PyDoc_STRVAR(istitle__doc__,
3830"S.istitle() -> bool\n\
3831\n\
3832Return True if S is a titlecased string and there is at least one\n\
3833character in S, i.e. uppercase characters may only follow uncased\n\
3834characters and lowercase characters only cased ones. Return False\n\
3835otherwise.");
3836
3837static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003838string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003839{
3840 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003841 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003842 register const unsigned char *e;
3843 int cased, previous_is_cased;
3844
3845 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003846 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003847 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003848
3849 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003850 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003851 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003852
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003853 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003854 cased = 0;
3855 previous_is_cased = 0;
3856 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003857 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003858
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003859 if (isupper(ch)) {
3860 if (previous_is_cased)
3861 return PyBool_FromLong(0);
3862 previous_is_cased = 1;
3863 cased = 1;
3864 }
3865 else if (islower(ch)) {
3866 if (!previous_is_cased)
3867 return PyBool_FromLong(0);
3868 previous_is_cased = 1;
3869 cased = 1;
3870 }
3871 else
3872 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003873 }
3874 return PyBool_FromLong(cased);
3875}
3876
3877
3878PyDoc_STRVAR(splitlines__doc__,
3879"S.splitlines([keepends]) -> list of strings\n\
3880\n\
3881Return a list of the lines in S, breaking at line boundaries.\n\
3882Line breaks are not included in the resulting list unless keepends\n\
3883is given and true.");
3884
3885static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003886string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003887{
3888 register Py_ssize_t i;
3889 register Py_ssize_t j;
3890 Py_ssize_t len;
3891 int keepends = 0;
3892 PyObject *list;
3893 PyObject *str;
3894 char *data;
3895
3896 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003897 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003898
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003899 data = PyString_AS_STRING(self);
3900 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003901
3902 /* This does not use the preallocated list because splitlines is
3903 usually run with hundreds of newlines. The overhead of
3904 switching between PyList_SET_ITEM and append causes about a
3905 2-3% slowdown for that common case. A smarter implementation
3906 could move the if check out, so the SET_ITEMs are done first
3907 and the appends only done when the prealloc buffer is full.
3908 That's too much work for little gain.*/
3909
3910 list = PyList_New(0);
3911 if (!list)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003912 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +00003913
3914 for (i = j = 0; i < len; ) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003915 Py_ssize_t eol;
Christian Heimes44720832008-05-26 13:01:01 +00003916
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003917 /* Find a line and append it */
3918 while (i < len && data[i] != '\n' && data[i] != '\r')
3919 i++;
Christian Heimes44720832008-05-26 13:01:01 +00003920
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003921 /* Skip the line break reading CRLF as one line break */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003922 eol = i;
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003923 if (i < len) {
3924 if (data[i] == '\r' && i + 1 < len &&
3925 data[i+1] == '\n')
3926 i += 2;
3927 else
3928 i++;
3929 if (keepends)
3930 eol = i;
3931 }
3932 SPLIT_APPEND(data, j, eol);
3933 j = i;
Christian Heimes44720832008-05-26 13:01:01 +00003934 }
3935 if (j < len) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003936 SPLIT_APPEND(data, j, len);
Christian Heimes44720832008-05-26 13:01:01 +00003937 }
3938
3939 return list;
3940
3941 onError:
3942 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003943 return NULL;
3944}
3945
Robert Schuppenies51df0642008-06-01 16:16:17 +00003946PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003947"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003948
3949static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003950string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003951{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003952 Py_ssize_t res;
3953 res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
3954 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003955}
3956
Christian Heimes44720832008-05-26 13:01:01 +00003957#undef SPLIT_APPEND
3958#undef SPLIT_ADD
3959#undef MAX_PREALLOC
3960#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003961
3962static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003963string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003964{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003965 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003966}
3967
Christian Heimes1a6387e2008-03-26 12:49:49 +00003968
Christian Heimes44720832008-05-26 13:01:01 +00003969#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003970
Christian Heimes44720832008-05-26 13:01:01 +00003971PyDoc_STRVAR(format__doc__,
Georg Brandlc5356992010-08-01 22:02:09 +00003972"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003973\n\
3974");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003975
Eric Smithdc13b792008-05-30 18:10:04 +00003976static PyObject *
3977string__format__(PyObject* self, PyObject* args)
3978{
3979 PyObject *format_spec;
3980 PyObject *result = NULL;
3981 PyObject *tmp = NULL;
3982
3983 /* If 2.x, convert format_spec to the same type as value */
3984 /* This is to allow things like u''.format('') */
3985 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003986 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003987 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003988 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3989 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3990 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003991 }
3992 tmp = PyObject_Str(format_spec);
3993 if (tmp == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003994 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003995 format_spec = tmp;
3996
3997 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003998 PyString_AS_STRING(format_spec),
3999 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00004000done:
4001 Py_XDECREF(tmp);
4002 return result;
4003}
4004
Christian Heimes44720832008-05-26 13:01:01 +00004005PyDoc_STRVAR(p_format__doc__,
Georg Brandlc5356992010-08-01 22:02:09 +00004006"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00004007\n\
4008");
4009
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00004010
Christian Heimes1a6387e2008-03-26 12:49:49 +00004011static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00004012string_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004013 /* Counterparts of the obsolete stropmodule functions; except
4014 string.maketrans(). */
4015 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4016 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4017 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4018 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4019 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4020 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4021 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4022 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4023 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4024 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4025 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4026 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4027 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4028 capitalize__doc__},
4029 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4030 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4031 endswith__doc__},
4032 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4033 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4034 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4035 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4036 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4037 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4038 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4039 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4040 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4041 rpartition__doc__},
4042 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4043 startswith__doc__},
4044 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4045 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4046 swapcase__doc__},
4047 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4048 translate__doc__},
4049 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4050 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4051 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4052 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4053 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4054 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4055 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4056 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4057 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4058 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4059 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4060 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4061 expandtabs__doc__},
4062 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4063 splitlines__doc__},
4064 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4065 sizeof__doc__},
4066 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4067 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004068};
4069
4070static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004071str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004072
Christian Heimes44720832008-05-26 13:01:01 +00004073static PyObject *
4074string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4075{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004076 PyObject *x = NULL;
4077 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00004078
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004079 if (type != &PyString_Type)
4080 return str_subtype_new(type, args, kwds);
4081 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4082 return NULL;
4083 if (x == NULL)
4084 return PyString_FromString("");
4085 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00004086}
4087
4088static PyObject *
4089str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4090{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004091 PyObject *tmp, *pnew;
4092 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00004093
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004094 assert(PyType_IsSubtype(type, &PyString_Type));
4095 tmp = string_new(&PyString_Type, args, kwds);
4096 if (tmp == NULL)
4097 return NULL;
4098 assert(PyString_CheckExact(tmp));
4099 n = PyString_GET_SIZE(tmp);
4100 pnew = type->tp_alloc(type, n);
4101 if (pnew != NULL) {
4102 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4103 ((PyStringObject *)pnew)->ob_shash =
4104 ((PyStringObject *)tmp)->ob_shash;
4105 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4106 }
4107 Py_DECREF(tmp);
4108 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00004109}
4110
4111static PyObject *
4112basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4113{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004114 PyErr_SetString(PyExc_TypeError,
4115 "The basestring type cannot be instantiated");
4116 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004117}
4118
4119static PyObject *
4120string_mod(PyObject *v, PyObject *w)
4121{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004122 if (!PyString_Check(v)) {
4123 Py_INCREF(Py_NotImplemented);
4124 return Py_NotImplemented;
4125 }
4126 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004127}
4128
4129PyDoc_STRVAR(basestring_doc,
4130"Type basestring cannot be instantiated; it is the base for str and unicode.");
4131
4132static PyNumberMethods string_as_number = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004133 0, /*nb_add*/
4134 0, /*nb_subtract*/
4135 0, /*nb_multiply*/
4136 0, /*nb_divide*/
4137 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00004138};
4139
4140
4141PyTypeObject PyBaseString_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004142 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4143 "basestring",
4144 0,
4145 0,
4146 0, /* tp_dealloc */
4147 0, /* tp_print */
4148 0, /* tp_getattr */
4149 0, /* tp_setattr */
4150 0, /* tp_compare */
4151 0, /* tp_repr */
4152 0, /* tp_as_number */
4153 0, /* tp_as_sequence */
4154 0, /* tp_as_mapping */
4155 0, /* tp_hash */
4156 0, /* tp_call */
4157 0, /* tp_str */
4158 0, /* tp_getattro */
4159 0, /* tp_setattro */
4160 0, /* tp_as_buffer */
4161 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4162 basestring_doc, /* tp_doc */
4163 0, /* tp_traverse */
4164 0, /* tp_clear */
4165 0, /* tp_richcompare */
4166 0, /* tp_weaklistoffset */
4167 0, /* tp_iter */
4168 0, /* tp_iternext */
4169 0, /* tp_methods */
4170 0, /* tp_members */
4171 0, /* tp_getset */
4172 &PyBaseObject_Type, /* tp_base */
4173 0, /* tp_dict */
4174 0, /* tp_descr_get */
4175 0, /* tp_descr_set */
4176 0, /* tp_dictoffset */
4177 0, /* tp_init */
4178 0, /* tp_alloc */
4179 basestring_new, /* tp_new */
4180 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00004181};
4182
4183PyDoc_STRVAR(string_doc,
4184"str(object) -> string\n\
4185\n\
4186Return a nice string representation of the object.\n\
4187If the argument is a string, the return value is the same object.");
4188
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004189PyTypeObject PyString_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004190 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4191 "str",
4192 sizeof(PyStringObject),
4193 sizeof(char),
4194 string_dealloc, /* tp_dealloc */
4195 (printfunc)string_print, /* tp_print */
4196 0, /* tp_getattr */
4197 0, /* tp_setattr */
4198 0, /* tp_compare */
4199 string_repr, /* tp_repr */
4200 &string_as_number, /* tp_as_number */
4201 &string_as_sequence, /* tp_as_sequence */
4202 &string_as_mapping, /* tp_as_mapping */
4203 (hashfunc)string_hash, /* tp_hash */
4204 0, /* tp_call */
4205 string_str, /* tp_str */
4206 PyObject_GenericGetAttr, /* tp_getattro */
4207 0, /* tp_setattro */
4208 &string_as_buffer, /* tp_as_buffer */
4209 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4210 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4211 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4212 string_doc, /* tp_doc */
4213 0, /* tp_traverse */
4214 0, /* tp_clear */
4215 (richcmpfunc)string_richcompare, /* tp_richcompare */
4216 0, /* tp_weaklistoffset */
4217 0, /* tp_iter */
4218 0, /* tp_iternext */
4219 string_methods, /* tp_methods */
4220 0, /* tp_members */
4221 0, /* tp_getset */
4222 &PyBaseString_Type, /* tp_base */
4223 0, /* tp_dict */
4224 0, /* tp_descr_get */
4225 0, /* tp_descr_set */
4226 0, /* tp_dictoffset */
4227 0, /* tp_init */
4228 0, /* tp_alloc */
4229 string_new, /* tp_new */
4230 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00004231};
4232
4233void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004234PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004235{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004236 register PyObject *v;
4237 if (*pv == NULL)
4238 return;
4239 if (w == NULL || !PyString_Check(*pv)) {
4240 Py_DECREF(*pv);
4241 *pv = NULL;
4242 return;
4243 }
4244 v = string_concat((PyStringObject *) *pv, w);
4245 Py_DECREF(*pv);
4246 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00004247}
4248
4249void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004250PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004251{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004252 PyString_Concat(pv, w);
4253 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00004254}
4255
4256
4257/* The following function breaks the notion that strings are immutable:
4258 it changes the size of a string. We get away with this only if there
4259 is only one module referencing the object. You can also think of it
4260 as creating a new string object and destroying the old one, only
4261 more efficiently. In any case, don't use this if the string may
4262 already be known to some other part of the code...
4263 Note that if there's not enough memory to resize the string, the original
4264 string object at *pv is deallocated, *pv is set to NULL, an "out of
4265 memory" exception is set, and -1 is returned. Else (on success) 0 is
4266 returned, and the value in *pv may or may not be the same as on input.
4267 As always, an extra byte is allocated for a trailing \0 byte (newsize
4268 does *not* include that), and a trailing \0 byte is stored.
4269*/
4270
4271int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004272_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004273{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004274 register PyObject *v;
4275 register PyStringObject *sv;
4276 v = *pv;
4277 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4278 PyString_CHECK_INTERNED(v)) {
4279 *pv = 0;
4280 Py_DECREF(v);
4281 PyErr_BadInternalCall();
4282 return -1;
4283 }
4284 /* XXX UNREF/NEWREF interface should be more symmetrical */
4285 _Py_DEC_REFTOTAL;
4286 _Py_ForgetReference(v);
4287 *pv = (PyObject *)
4288 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4289 if (*pv == NULL) {
4290 PyObject_Del(v);
4291 PyErr_NoMemory();
4292 return -1;
4293 }
4294 _Py_NewReference(*pv);
4295 sv = (PyStringObject *) *pv;
4296 Py_SIZE(sv) = newsize;
4297 sv->ob_sval[newsize] = '\0';
4298 sv->ob_shash = -1; /* invalidate cached hash value */
4299 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00004300}
4301
4302/* Helpers for formatstring */
4303
4304Py_LOCAL_INLINE(PyObject *)
4305getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4306{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004307 Py_ssize_t argidx = *p_argidx;
4308 if (argidx < arglen) {
4309 (*p_argidx)++;
4310 if (arglen < 0)
4311 return args;
4312 else
4313 return PyTuple_GetItem(args, argidx);
4314 }
4315 PyErr_SetString(PyExc_TypeError,
4316 "not enough arguments for format string");
4317 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004318}
4319
4320/* Format codes
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004321 * F_LJUST '-'
4322 * F_SIGN '+'
4323 * F_BLANK ' '
4324 * F_ALT '#'
4325 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00004326 */
4327#define F_LJUST (1<<0)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004328#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00004329#define F_BLANK (1<<2)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004330#define F_ALT (1<<3)
4331#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00004332
4333Py_LOCAL_INLINE(int)
4334formatfloat(char *buf, size_t buflen, int flags,
4335 int prec, int type, PyObject *v)
4336{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004337 /* fmt = '%#.' + `prec` + `type`
4338 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4339 char fmt[20];
4340 double x;
4341 x = PyFloat_AsDouble(v);
4342 if (x == -1.0 && PyErr_Occurred()) {
4343 PyErr_Format(PyExc_TypeError, "float argument required, "
4344 "not %.200s", Py_TYPE(v)->tp_name);
4345 return -1;
4346 }
4347 if (prec < 0)
4348 prec = 6;
Mark Dickinson75be68b2009-08-28 20:57:42 +00004349#if SIZEOF_INT > 4
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004350 /* make sure that the decimal representation of precision really does
4351 need at most 10 digits: platforms with sizeof(int) == 8 exist! */
4352 if (prec > 0x7fffffff) {
4353 PyErr_SetString(PyExc_OverflowError,
4354 "outrageously large precision "
4355 "for formatted float");
4356 return -1;
4357 }
Mark Dickinson75be68b2009-08-28 20:57:42 +00004358#endif
Mark Dickinson87886192009-03-29 16:18:33 +00004359
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004360 if (type == 'f' && fabs(x) >= 1e50)
4361 type = 'g';
4362 /* Worst case length calc to ensure no buffer overrun:
Christian Heimes44720832008-05-26 13:01:01 +00004363
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004364 'g' formats:
4365 fmt = %#.<prec>g
4366 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4367 for any double rep.)
4368 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Christian Heimes44720832008-05-26 13:01:01 +00004369
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004370 'f' formats:
4371 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4372 len = 1 + 50 + 1 + prec = 52 + prec
Christian Heimes44720832008-05-26 13:01:01 +00004373
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004374 If prec=0 the effective precision is 1 (the leading digit is
4375 always given), therefore increase the length by one.
Christian Heimes44720832008-05-26 13:01:01 +00004376
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004377 */
4378 if (((type == 'g' || type == 'G') &&
4379 buflen <= (size_t)10 + (size_t)prec) ||
4380 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4381 PyErr_SetString(PyExc_OverflowError,
4382 "formatted float is too long (precision too large?)");
4383 return -1;
4384 }
4385 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4386 (flags&F_ALT) ? "#" : "",
4387 prec, type);
4388 PyOS_ascii_formatd(buf, buflen, fmt, x);
4389 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004390}
4391
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004392/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004393 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4394 * Python's regular ints.
4395 * Return value: a new PyString*, or NULL if error.
4396 * . *pbuf is set to point into it,
4397 * *plen set to the # of chars following that.
4398 * Caller must decref it when done using pbuf.
4399 * The string starting at *pbuf is of the form
4400 * "-"? ("0x" | "0X")? digit+
4401 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4402 * set in flags. The case of hex digits will be correct,
4403 * There will be at least prec digits, zero-filled on the left if
4404 * necessary to get that many.
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004405 * val object to be converted
4406 * flags bitmask of format flags; only F_ALT is looked at
4407 * prec minimum number of digits; 0-fill on left if needed
4408 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00004409 *
4410 * CAUTION: o, x and X conversions on regular ints can never
4411 * produce a '-' sign, but can for Python's unbounded ints.
4412 */
4413PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004414_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004415 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00004416{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004417 PyObject *result = NULL;
4418 char *buf;
4419 Py_ssize_t i;
4420 int sign; /* 1 if '-', else 0 */
4421 int len; /* number of characters */
4422 Py_ssize_t llen;
4423 int numdigits; /* len == numnondigits + numdigits */
4424 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004425
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004426 switch (type) {
4427 case 'd':
4428 case 'u':
4429 result = Py_TYPE(val)->tp_str(val);
4430 break;
4431 case 'o':
4432 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4433 break;
4434 case 'x':
4435 case 'X':
4436 numnondigits = 2;
4437 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4438 break;
4439 default:
4440 assert(!"'type' not in [duoxX]");
4441 }
4442 if (!result)
4443 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004444
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004445 buf = PyString_AsString(result);
4446 if (!buf) {
4447 Py_DECREF(result);
4448 return NULL;
4449 }
Christian Heimes44720832008-05-26 13:01:01 +00004450
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004451 /* To modify the string in-place, there can only be one reference. */
4452 if (Py_REFCNT(result) != 1) {
4453 PyErr_BadInternalCall();
4454 return NULL;
4455 }
4456 llen = PyString_Size(result);
4457 if (llen > INT_MAX) {
4458 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4459 return NULL;
4460 }
4461 len = (int)llen;
4462 if (buf[len-1] == 'L') {
4463 --len;
4464 buf[len] = '\0';
4465 }
4466 sign = buf[0] == '-';
4467 numnondigits += sign;
4468 numdigits = len - numnondigits;
4469 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004470
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004471 /* Get rid of base marker unless F_ALT */
4472 if ((flags & F_ALT) == 0) {
4473 /* Need to skip 0x, 0X or 0. */
4474 int skipped = 0;
4475 switch (type) {
4476 case 'o':
4477 assert(buf[sign] == '0');
4478 /* If 0 is only digit, leave it alone. */
4479 if (numdigits > 1) {
4480 skipped = 1;
4481 --numdigits;
4482 }
4483 break;
4484 case 'x':
4485 case 'X':
4486 assert(buf[sign] == '0');
4487 assert(buf[sign + 1] == 'x');
4488 skipped = 2;
4489 numnondigits -= 2;
4490 break;
4491 }
4492 if (skipped) {
4493 buf += skipped;
4494 len -= skipped;
4495 if (sign)
4496 buf[0] = '-';
4497 }
4498 assert(len == numnondigits + numdigits);
4499 assert(numdigits > 0);
4500 }
Christian Heimes44720832008-05-26 13:01:01 +00004501
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004502 /* Fill with leading zeroes to meet minimum width. */
4503 if (prec > numdigits) {
4504 PyObject *r1 = PyString_FromStringAndSize(NULL,
4505 numnondigits + prec);
4506 char *b1;
4507 if (!r1) {
4508 Py_DECREF(result);
4509 return NULL;
4510 }
4511 b1 = PyString_AS_STRING(r1);
4512 for (i = 0; i < numnondigits; ++i)
4513 *b1++ = *buf++;
4514 for (i = 0; i < prec - numdigits; i++)
4515 *b1++ = '0';
4516 for (i = 0; i < numdigits; i++)
4517 *b1++ = *buf++;
4518 *b1 = '\0';
4519 Py_DECREF(result);
4520 result = r1;
4521 buf = PyString_AS_STRING(result);
4522 len = numnondigits + prec;
4523 }
Christian Heimes44720832008-05-26 13:01:01 +00004524
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004525 /* Fix up case for hex conversions. */
4526 if (type == 'X') {
4527 /* Need to convert all lower case letters to upper case.
4528 and need to convert 0x to 0X (and -0x to -0X). */
4529 for (i = 0; i < len; i++)
4530 if (buf[i] >= 'a' && buf[i] <= 'x')
4531 buf[i] -= 'a'-'A';
4532 }
4533 *pbuf = buf;
4534 *plen = len;
4535 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004536}
4537
4538Py_LOCAL_INLINE(int)
4539formatint(char *buf, size_t buflen, int flags,
4540 int prec, int type, PyObject *v)
4541{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004542 /* fmt = '%#.' + `prec` + 'l' + `type`
4543 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4544 + 1 + 1 = 24 */
4545 char fmt[64]; /* plenty big enough! */
4546 char *sign;
4547 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004548
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004549 x = PyInt_AsLong(v);
4550 if (x == -1 && PyErr_Occurred()) {
4551 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4552 Py_TYPE(v)->tp_name);
4553 return -1;
4554 }
4555 if (x < 0 && type == 'u') {
4556 type = 'd';
4557 }
4558 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4559 sign = "-";
4560 else
4561 sign = "";
4562 if (prec < 0)
4563 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004564
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004565 if ((flags & F_ALT) &&
4566 (type == 'x' || type == 'X')) {
4567 /* When converting under %#x or %#X, there are a number
4568 * of issues that cause pain:
4569 * - when 0 is being converted, the C standard leaves off
4570 * the '0x' or '0X', which is inconsistent with other
4571 * %#x/%#X conversions and inconsistent with Python's
4572 * hex() function
4573 * - there are platforms that violate the standard and
4574 * convert 0 with the '0x' or '0X'
4575 * (Metrowerks, Compaq Tru64)
4576 * - there are platforms that give '0x' when converting
4577 * under %#X, but convert 0 in accordance with the
4578 * standard (OS/2 EMX)
4579 *
4580 * We can achieve the desired consistency by inserting our
4581 * own '0x' or '0X' prefix, and substituting %x/%X in place
4582 * of %#x/%#X.
4583 *
4584 * Note that this is the same approach as used in
4585 * formatint() in unicodeobject.c
4586 */
4587 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4588 sign, type, prec, type);
4589 }
4590 else {
4591 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4592 sign, (flags&F_ALT) ? "#" : "",
4593 prec, type);
4594 }
Christian Heimes44720832008-05-26 13:01:01 +00004595
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004596 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4597 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4598 */
4599 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4600 PyErr_SetString(PyExc_OverflowError,
4601 "formatted integer is too long (precision too large?)");
4602 return -1;
4603 }
4604 if (sign[0])
4605 PyOS_snprintf(buf, buflen, fmt, -x);
4606 else
4607 PyOS_snprintf(buf, buflen, fmt, x);
4608 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004609}
4610
4611Py_LOCAL_INLINE(int)
4612formatchar(char *buf, size_t buflen, PyObject *v)
4613{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004614 /* presume that the buffer is at least 2 characters long */
4615 if (PyString_Check(v)) {
4616 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4617 return -1;
4618 }
4619 else {
4620 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4621 return -1;
4622 }
4623 buf[1] = '\0';
4624 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004625}
4626
4627/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4628
4629 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4630 chars are formatted. XXX This is a magic number. Each formatting
4631 routine does bounds checking to ensure no overflow, but a better
4632 solution may be to malloc a buffer of appropriate size for each
4633 format. For now, the current solution is sufficient.
4634*/
4635#define FORMATBUFLEN (size_t)120
4636
4637PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004638PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004639{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004640 char *fmt, *res;
4641 Py_ssize_t arglen, argidx;
4642 Py_ssize_t reslen, rescnt, fmtcnt;
4643 int args_owned = 0;
4644 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004645#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004646 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004647#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004648 PyObject *dict = NULL;
4649 if (format == NULL || !PyString_Check(format) || args == NULL) {
4650 PyErr_BadInternalCall();
4651 return NULL;
4652 }
4653 orig_args = args;
4654 fmt = PyString_AS_STRING(format);
4655 fmtcnt = PyString_GET_SIZE(format);
4656 reslen = rescnt = fmtcnt + 100;
4657 result = PyString_FromStringAndSize((char *)NULL, reslen);
4658 if (result == NULL)
4659 return NULL;
4660 res = PyString_AsString(result);
4661 if (PyTuple_Check(args)) {
4662 arglen = PyTuple_GET_SIZE(args);
4663 argidx = 0;
4664 }
4665 else {
4666 arglen = -1;
4667 argidx = -2;
4668 }
4669 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4670 !PyObject_TypeCheck(args, &PyBaseString_Type))
4671 dict = args;
4672 while (--fmtcnt >= 0) {
4673 if (*fmt != '%') {
4674 if (--rescnt < 0) {
4675 rescnt = fmtcnt + 100;
4676 reslen += rescnt;
4677 if (_PyString_Resize(&result, reslen) < 0)
4678 return NULL;
4679 res = PyString_AS_STRING(result)
4680 + reslen - rescnt;
4681 --rescnt;
4682 }
4683 *res++ = *fmt++;
4684 }
4685 else {
4686 /* Got a format specifier */
4687 int flags = 0;
4688 Py_ssize_t width = -1;
4689 int prec = -1;
4690 int c = '\0';
4691 int fill;
4692 int isnumok;
4693 PyObject *v = NULL;
4694 PyObject *temp = NULL;
4695 char *pbuf;
4696 int sign;
4697 Py_ssize_t len;
4698 char formatbuf[FORMATBUFLEN];
4699 /* For format{float,int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004700#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004701 char *fmt_start = fmt;
4702 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004703#endif
4704
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004705 fmt++;
4706 if (*fmt == '(') {
4707 char *keystart;
4708 Py_ssize_t keylen;
4709 PyObject *key;
4710 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004711
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004712 if (dict == NULL) {
4713 PyErr_SetString(PyExc_TypeError,
4714 "format requires a mapping");
4715 goto error;
4716 }
4717 ++fmt;
4718 --fmtcnt;
4719 keystart = fmt;
4720 /* Skip over balanced parentheses */
4721 while (pcount > 0 && --fmtcnt >= 0) {
4722 if (*fmt == ')')
4723 --pcount;
4724 else if (*fmt == '(')
4725 ++pcount;
4726 fmt++;
4727 }
4728 keylen = fmt - keystart - 1;
4729 if (fmtcnt < 0 || pcount > 0) {
4730 PyErr_SetString(PyExc_ValueError,
4731 "incomplete format key");
4732 goto error;
4733 }
4734 key = PyString_FromStringAndSize(keystart,
4735 keylen);
4736 if (key == NULL)
4737 goto error;
4738 if (args_owned) {
4739 Py_DECREF(args);
4740 args_owned = 0;
4741 }
4742 args = PyObject_GetItem(dict, key);
4743 Py_DECREF(key);
4744 if (args == NULL) {
4745 goto error;
4746 }
4747 args_owned = 1;
4748 arglen = -1;
4749 argidx = -2;
4750 }
4751 while (--fmtcnt >= 0) {
4752 switch (c = *fmt++) {
4753 case '-': flags |= F_LJUST; continue;
4754 case '+': flags |= F_SIGN; continue;
4755 case ' ': flags |= F_BLANK; continue;
4756 case '#': flags |= F_ALT; continue;
4757 case '0': flags |= F_ZERO; continue;
4758 }
4759 break;
4760 }
4761 if (c == '*') {
4762 v = getnextarg(args, arglen, &argidx);
4763 if (v == NULL)
4764 goto error;
4765 if (!PyInt_Check(v)) {
4766 PyErr_SetString(PyExc_TypeError,
4767 "* wants int");
4768 goto error;
4769 }
4770 width = PyInt_AsLong(v);
4771 if (width < 0) {
4772 flags |= F_LJUST;
4773 width = -width;
4774 }
4775 if (--fmtcnt >= 0)
4776 c = *fmt++;
4777 }
4778 else if (c >= 0 && isdigit(c)) {
4779 width = c - '0';
4780 while (--fmtcnt >= 0) {
4781 c = Py_CHARMASK(*fmt++);
4782 if (!isdigit(c))
4783 break;
4784 if ((width*10) / 10 != width) {
4785 PyErr_SetString(
4786 PyExc_ValueError,
4787 "width too big");
4788 goto error;
4789 }
4790 width = width*10 + (c - '0');
4791 }
4792 }
4793 if (c == '.') {
4794 prec = 0;
4795 if (--fmtcnt >= 0)
4796 c = *fmt++;
4797 if (c == '*') {
4798 v = getnextarg(args, arglen, &argidx);
4799 if (v == NULL)
4800 goto error;
4801 if (!PyInt_Check(v)) {
4802 PyErr_SetString(
4803 PyExc_TypeError,
4804 "* wants int");
4805 goto error;
4806 }
4807 prec = PyInt_AsLong(v);
4808 if (prec < 0)
4809 prec = 0;
4810 if (--fmtcnt >= 0)
4811 c = *fmt++;
4812 }
4813 else if (c >= 0 && isdigit(c)) {
4814 prec = c - '0';
4815 while (--fmtcnt >= 0) {
4816 c = Py_CHARMASK(*fmt++);
4817 if (!isdigit(c))
4818 break;
4819 if ((prec*10) / 10 != prec) {
4820 PyErr_SetString(
4821 PyExc_ValueError,
4822 "prec too big");
4823 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004824 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004825 prec = prec*10 + (c - '0');
4826 }
4827 }
4828 } /* prec */
4829 if (fmtcnt >= 0) {
4830 if (c == 'h' || c == 'l' || c == 'L') {
4831 if (--fmtcnt >= 0)
4832 c = *fmt++;
4833 }
4834 }
4835 if (fmtcnt < 0) {
4836 PyErr_SetString(PyExc_ValueError,
4837 "incomplete format");
4838 goto error;
4839 }
4840 if (c != '%') {
4841 v = getnextarg(args, arglen, &argidx);
4842 if (v == NULL)
4843 goto error;
4844 }
4845 sign = 0;
4846 fill = ' ';
4847 switch (c) {
4848 case '%':
4849 pbuf = "%";
4850 len = 1;
4851 break;
4852 case 's':
4853#ifdef Py_USING_UNICODE
4854 if (PyUnicode_Check(v)) {
4855 fmt = fmt_start;
4856 argidx = argidx_start;
4857 goto unicode;
4858 }
4859#endif
4860 temp = _PyObject_Str(v);
4861#ifdef Py_USING_UNICODE
4862 if (temp != NULL && PyUnicode_Check(temp)) {
4863 Py_DECREF(temp);
4864 fmt = fmt_start;
4865 argidx = argidx_start;
4866 goto unicode;
4867 }
4868#endif
4869 /* Fall through */
4870 case 'r':
4871 if (c == 'r')
4872 temp = PyObject_Repr(v);
4873 if (temp == NULL)
4874 goto error;
4875 if (!PyString_Check(temp)) {
4876 PyErr_SetString(PyExc_TypeError,
4877 "%s argument has non-string str()");
4878 Py_DECREF(temp);
4879 goto error;
4880 }
4881 pbuf = PyString_AS_STRING(temp);
4882 len = PyString_GET_SIZE(temp);
4883 if (prec >= 0 && len > prec)
4884 len = prec;
4885 break;
4886 case 'i':
4887 case 'd':
4888 case 'u':
4889 case 'o':
4890 case 'x':
4891 case 'X':
4892 if (c == 'i')
4893 c = 'd';
4894 isnumok = 0;
4895 if (PyNumber_Check(v)) {
4896 PyObject *iobj=NULL;
4897
4898 if (PyInt_Check(v) || (PyLong_Check(v))) {
4899 iobj = v;
4900 Py_INCREF(iobj);
4901 }
4902 else {
4903 iobj = PyNumber_Int(v);
4904 if (iobj==NULL) iobj = PyNumber_Long(v);
4905 }
4906 if (iobj!=NULL) {
4907 if (PyInt_Check(iobj)) {
4908 isnumok = 1;
4909 pbuf = formatbuf;
4910 len = formatint(pbuf,
4911 sizeof(formatbuf),
4912 flags, prec, c, iobj);
4913 Py_DECREF(iobj);
4914 if (len < 0)
4915 goto error;
4916 sign = 1;
4917 }
4918 else if (PyLong_Check(iobj)) {
4919 int ilen;
4920
4921 isnumok = 1;
4922 temp = _PyString_FormatLong(iobj, flags,
4923 prec, c, &pbuf, &ilen);
4924 Py_DECREF(iobj);
4925 len = ilen;
4926 if (!temp)
4927 goto error;
4928 sign = 1;
4929 }
4930 else {
4931 Py_DECREF(iobj);
4932 }
4933 }
4934 }
4935 if (!isnumok) {
4936 PyErr_Format(PyExc_TypeError,
4937 "%%%c format: a number is required, "
4938 "not %.200s", c, Py_TYPE(v)->tp_name);
4939 goto error;
4940 }
4941 if (flags & F_ZERO)
4942 fill = '0';
4943 break;
4944 case 'e':
4945 case 'E':
4946 case 'f':
4947 case 'F':
4948 case 'g':
4949 case 'G':
4950 if (c == 'F')
4951 c = 'f';
4952 pbuf = formatbuf;
4953 len = formatfloat(pbuf, sizeof(formatbuf),
4954 flags, prec, c, v);
4955 if (len < 0)
4956 goto error;
4957 sign = 1;
4958 if (flags & F_ZERO)
4959 fill = '0';
4960 break;
4961 case 'c':
4962#ifdef Py_USING_UNICODE
4963 if (PyUnicode_Check(v)) {
4964 fmt = fmt_start;
4965 argidx = argidx_start;
4966 goto unicode;
4967 }
4968#endif
4969 pbuf = formatbuf;
4970 len = formatchar(pbuf, sizeof(formatbuf), v);
4971 if (len < 0)
4972 goto error;
4973 break;
4974 default:
4975 PyErr_Format(PyExc_ValueError,
4976 "unsupported format character '%c' (0x%x) "
4977 "at index %zd",
4978 c, c,
4979 (Py_ssize_t)(fmt - 1 -
4980 PyString_AsString(format)));
4981 goto error;
4982 }
4983 if (sign) {
4984 if (*pbuf == '-' || *pbuf == '+') {
4985 sign = *pbuf++;
4986 len--;
4987 }
4988 else if (flags & F_SIGN)
4989 sign = '+';
4990 else if (flags & F_BLANK)
4991 sign = ' ';
4992 else
4993 sign = 0;
4994 }
4995 if (width < len)
4996 width = len;
4997 if (rescnt - (sign != 0) < width) {
4998 reslen -= rescnt;
4999 rescnt = width + fmtcnt + 100;
5000 reslen += rescnt;
5001 if (reslen < 0) {
5002 Py_DECREF(result);
5003 Py_XDECREF(temp);
5004 return PyErr_NoMemory();
5005 }
5006 if (_PyString_Resize(&result, reslen) < 0) {
5007 Py_XDECREF(temp);
5008 return NULL;
5009 }
5010 res = PyString_AS_STRING(result)
5011 + reslen - rescnt;
5012 }
5013 if (sign) {
5014 if (fill != ' ')
5015 *res++ = sign;
5016 rescnt--;
5017 if (width > len)
5018 width--;
5019 }
5020 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5021 assert(pbuf[0] == '0');
5022 assert(pbuf[1] == c);
5023 if (fill != ' ') {
5024 *res++ = *pbuf++;
5025 *res++ = *pbuf++;
5026 }
5027 rescnt -= 2;
5028 width -= 2;
5029 if (width < 0)
5030 width = 0;
5031 len -= 2;
5032 }
5033 if (width > len && !(flags & F_LJUST)) {
5034 do {
5035 --rescnt;
5036 *res++ = fill;
5037 } while (--width > len);
5038 }
5039 if (fill == ' ') {
5040 if (sign)
5041 *res++ = sign;
5042 if ((flags & F_ALT) &&
5043 (c == 'x' || c == 'X')) {
5044 assert(pbuf[0] == '0');
5045 assert(pbuf[1] == c);
5046 *res++ = *pbuf++;
5047 *res++ = *pbuf++;
5048 }
5049 }
5050 Py_MEMCPY(res, pbuf, len);
5051 res += len;
5052 rescnt -= len;
5053 while (--width >= len) {
5054 --rescnt;
5055 *res++ = ' ';
5056 }
5057 if (dict && (argidx < arglen) && c != '%') {
5058 PyErr_SetString(PyExc_TypeError,
5059 "not all arguments converted during string formatting");
5060 Py_XDECREF(temp);
5061 goto error;
5062 }
5063 Py_XDECREF(temp);
5064 } /* '%' */
5065 } /* until end */
5066 if (argidx < arglen && !dict) {
5067 PyErr_SetString(PyExc_TypeError,
5068 "not all arguments converted during string formatting");
5069 goto error;
5070 }
5071 if (args_owned) {
5072 Py_DECREF(args);
5073 }
5074 _PyString_Resize(&result, reslen - rescnt);
5075 return result;
Christian Heimes44720832008-05-26 13:01:01 +00005076
5077#ifdef Py_USING_UNICODE
5078 unicode:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005079 if (args_owned) {
5080 Py_DECREF(args);
5081 args_owned = 0;
5082 }
5083 /* Fiddle args right (remove the first argidx arguments) */
5084 if (PyTuple_Check(orig_args) && argidx > 0) {
5085 PyObject *v;
5086 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5087 v = PyTuple_New(n);
5088 if (v == NULL)
5089 goto error;
5090 while (--n >= 0) {
5091 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5092 Py_INCREF(w);
5093 PyTuple_SET_ITEM(v, n, w);
5094 }
5095 args = v;
5096 } else {
5097 Py_INCREF(orig_args);
5098 args = orig_args;
5099 }
5100 args_owned = 1;
5101 /* Take what we have of the result and let the Unicode formatting
5102 function format the rest of the input. */
5103 rescnt = res - PyString_AS_STRING(result);
5104 if (_PyString_Resize(&result, rescnt))
5105 goto error;
5106 fmtcnt = PyString_GET_SIZE(format) - \
5107 (fmt - PyString_AS_STRING(format));
5108 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5109 if (format == NULL)
5110 goto error;
5111 v = PyUnicode_Format(format, args);
5112 Py_DECREF(format);
5113 if (v == NULL)
5114 goto error;
5115 /* Paste what we have (result) to what the Unicode formatting
5116 function returned (v) and return the result (or error) */
5117 w = PyUnicode_Concat(result, v);
5118 Py_DECREF(result);
5119 Py_DECREF(v);
5120 Py_DECREF(args);
5121 return w;
Christian Heimes44720832008-05-26 13:01:01 +00005122#endif /* Py_USING_UNICODE */
5123
5124 error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005125 Py_DECREF(result);
5126 if (args_owned) {
5127 Py_DECREF(args);
5128 }
5129 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00005130}
5131
5132void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005133PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005134{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005135 register PyStringObject *s = (PyStringObject *)(*p);
5136 PyObject *t;
5137 if (s == NULL || !PyString_Check(s))
5138 Py_FatalError("PyString_InternInPlace: strings only please!");
5139 /* If it's a string subclass, we don't really know what putting
5140 it in the interned dict might do. */
5141 if (!PyString_CheckExact(s))
5142 return;
5143 if (PyString_CHECK_INTERNED(s))
5144 return;
5145 if (interned == NULL) {
5146 interned = PyDict_New();
5147 if (interned == NULL) {
5148 PyErr_Clear(); /* Don't leave an exception */
5149 return;
5150 }
5151 }
5152 t = PyDict_GetItem(interned, (PyObject *)s);
5153 if (t) {
5154 Py_INCREF(t);
5155 Py_DECREF(*p);
5156 *p = t;
5157 return;
5158 }
Christian Heimes44720832008-05-26 13:01:01 +00005159
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005160 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5161 PyErr_Clear();
5162 return;
5163 }
5164 /* The two references in interned are not counted by refcnt.
5165 The string deallocator will take care of this */
5166 Py_REFCNT(s) -= 2;
5167 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005168}
5169
5170void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005171PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005172{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005173 PyString_InternInPlace(p);
5174 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5175 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5176 Py_INCREF(*p);
5177 }
Christian Heimes44720832008-05-26 13:01:01 +00005178}
5179
5180
5181PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005182PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005183{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005184 PyObject *s = PyString_FromString(cp);
5185 if (s == NULL)
5186 return NULL;
5187 PyString_InternInPlace(&s);
5188 return s;
Christian Heimes44720832008-05-26 13:01:01 +00005189}
5190
5191void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005192PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005193{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005194 int i;
5195 for (i = 0; i < UCHAR_MAX + 1; i++) {
5196 Py_XDECREF(characters[i]);
5197 characters[i] = NULL;
5198 }
5199 Py_XDECREF(nullstring);
5200 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00005201}
5202
5203void _Py_ReleaseInternedStrings(void)
5204{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005205 PyObject *keys;
5206 PyStringObject *s;
5207 Py_ssize_t i, n;
5208 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00005209
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005210 if (interned == NULL || !PyDict_Check(interned))
5211 return;
5212 keys = PyDict_Keys(interned);
5213 if (keys == NULL || !PyList_Check(keys)) {
5214 PyErr_Clear();
5215 return;
5216 }
Christian Heimes44720832008-05-26 13:01:01 +00005217
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005218 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5219 detector, interned strings are not forcibly deallocated; rather, we
5220 give them their stolen references back, and then clear and DECREF
5221 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00005222
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005223 n = PyList_GET_SIZE(keys);
5224 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5225 n);
5226 for (i = 0; i < n; i++) {
5227 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5228 switch (s->ob_sstate) {
5229 case SSTATE_NOT_INTERNED:
5230 /* XXX Shouldn't happen */
5231 break;
5232 case SSTATE_INTERNED_IMMORTAL:
5233 Py_REFCNT(s) += 1;
5234 immortal_size += Py_SIZE(s);
5235 break;
5236 case SSTATE_INTERNED_MORTAL:
5237 Py_REFCNT(s) += 2;
5238 mortal_size += Py_SIZE(s);
5239 break;
5240 default:
5241 Py_FatalError("Inconsistent interned string state.");
5242 }
5243 s->ob_sstate = SSTATE_NOT_INTERNED;
5244 }
5245 fprintf(stderr, "total size of all interned strings: "
5246 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5247 "mortal/immortal\n", mortal_size, immortal_size);
5248 Py_DECREF(keys);
5249 PyDict_Clear(interned);
5250 Py_DECREF(interned);
5251 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005252}