blob: 9d058662bcd3d2d500cc37c95c2fd6051c7cc10d [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
7
8#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000012static PyStringObject *characters[UCHAR_MAX + 1];
13static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000014
15/* This dictionary holds all interned strings. Note that references to
16 strings in this dictionary are *not* counted in the string's ob_refcnt.
17 When the interned string reaches a refcnt of 0 the string deallocation
18 function will delete the reference from this dictionary.
19
20 Another way to look at this is that to say that the actual reference
21 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
22*/
23static PyObject *interned;
24
25/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
Christian Heimes44720832008-05-26 13:01:01 +000027 parameter `size' denotes number of characters to allocate, not counting any
28 null terminating character.
29
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000031 string containing exactly `size' bytes.
32
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000034 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000036 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000037 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000039 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
43
44 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000048 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000050*/
51PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000053{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000054 register PyStringObject *op;
55 if (size < 0) {
56 PyErr_SetString(PyExc_SystemError,
57 "Negative size passed to PyString_FromStringAndSize");
58 return NULL;
59 }
60 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000061#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000062 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000063#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000064 Py_INCREF(op);
65 return (PyObject *)op;
66 }
67 if (size == 1 && str != NULL &&
68 (op = characters[*str & UCHAR_MAX]) != NULL)
69 {
Christian Heimes44720832008-05-26 13:01:01 +000070#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000071 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000072#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000073 Py_INCREF(op);
74 return (PyObject *)op;
75 }
Christian Heimes44720832008-05-26 13:01:01 +000076
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000077 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
78 PyErr_SetString(PyExc_OverflowError, "string is too large");
79 return NULL;
80 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000081
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000082 /* Inline PyObject_NewVar */
83 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
84 if (op == NULL)
85 return PyErr_NoMemory();
86 PyObject_INIT_VAR(op, &PyString_Type, size);
87 op->ob_shash = -1;
88 op->ob_sstate = SSTATE_NOT_INTERNED;
89 if (str != NULL)
90 Py_MEMCPY(op->ob_sval, str, size);
91 op->ob_sval[size] = '\0';
92 /* share short strings */
93 if (size == 0) {
94 PyObject *t = (PyObject *)op;
95 PyString_InternInPlace(&t);
96 op = (PyStringObject *)t;
97 nullstring = op;
98 Py_INCREF(op);
99 } else if (size == 1 && str != NULL) {
100 PyObject *t = (PyObject *)op;
101 PyString_InternInPlace(&t);
102 op = (PyStringObject *)t;
103 characters[*str & UCHAR_MAX] = op;
104 Py_INCREF(op);
105 }
106 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000107}
108
Christian Heimes44720832008-05-26 13:01:01 +0000109PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000110PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000111{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000112 register size_t size;
113 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000114
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000115 assert(str != NULL);
116 size = strlen(str);
117 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
118 PyErr_SetString(PyExc_OverflowError,
119 "string is too long for a Python string");
120 return NULL;
121 }
122 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000123#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000124 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000125#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000126 Py_INCREF(op);
127 return (PyObject *)op;
128 }
129 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000130#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000131 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000132#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000133 Py_INCREF(op);
134 return (PyObject *)op;
135 }
Christian Heimes44720832008-05-26 13:01:01 +0000136
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000137 /* Inline PyObject_NewVar */
138 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
139 if (op == NULL)
140 return PyErr_NoMemory();
141 PyObject_INIT_VAR(op, &PyString_Type, size);
142 op->ob_shash = -1;
143 op->ob_sstate = SSTATE_NOT_INTERNED;
144 Py_MEMCPY(op->ob_sval, str, size+1);
145 /* share short strings */
146 if (size == 0) {
147 PyObject *t = (PyObject *)op;
148 PyString_InternInPlace(&t);
149 op = (PyStringObject *)t;
150 nullstring = op;
151 Py_INCREF(op);
152 } else if (size == 1) {
153 PyObject *t = (PyObject *)op;
154 PyString_InternInPlace(&t);
155 op = (PyStringObject *)t;
156 characters[*str & UCHAR_MAX] = op;
157 Py_INCREF(op);
158 }
159 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000160}
161
Christian Heimes44720832008-05-26 13:01:01 +0000162PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000163PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000164{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000165 va_list count;
166 Py_ssize_t n = 0;
167 const char* f;
168 char *s;
169 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000170
Christian Heimes44720832008-05-26 13:01:01 +0000171#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000172 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000173#else
174#ifdef __va_copy
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000175 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000176#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000177 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000178#endif
179#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000180 /* step 1: figure out how large a buffer we need */
181 for (f = format; *f; f++) {
182 if (*f == '%') {
183 const char* p = f;
184 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
185 ;
Christian Heimes44720832008-05-26 13:01:01 +0000186
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000187 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
188 * they don't affect the amount of space we reserve.
189 */
190 if ((*f == 'l' || *f == 'z') &&
191 (f[1] == 'd' || f[1] == 'u'))
192 ++f;
Christian Heimes44720832008-05-26 13:01:01 +0000193
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000194 switch (*f) {
195 case 'c':
196 (void)va_arg(count, int);
197 /* fall through... */
198 case '%':
199 n++;
200 break;
201 case 'd': case 'u': case 'i': case 'x':
202 (void) va_arg(count, int);
203 /* 20 bytes is enough to hold a 64-bit
204 integer. Decimal takes the most space.
205 This isn't enough for octal. */
206 n += 20;
207 break;
208 case 's':
209 s = va_arg(count, char*);
210 n += strlen(s);
211 break;
212 case 'p':
213 (void) va_arg(count, int);
214 /* maximum 64-bit pointer representation:
215 * 0xffffffffffffffff
216 * so 19 characters is enough.
217 * XXX I count 18 -- what's the extra for?
218 */
219 n += 19;
220 break;
221 default:
222 /* if we stumble upon an unknown
223 formatting code, copy the rest of
224 the format string to the output
225 string. (we cannot just skip the
226 code, since there's no way to know
227 what's in the argument list) */
228 n += strlen(p);
229 goto expand;
230 }
231 } else
232 n++;
233 }
Christian Heimes44720832008-05-26 13:01:01 +0000234 expand:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000235 /* step 2: fill the buffer */
236 /* Since we've analyzed how much space we need for the worst case,
237 use sprintf directly instead of the slower PyOS_snprintf. */
238 string = PyString_FromStringAndSize(NULL, n);
239 if (!string)
240 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000241
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000242 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000243
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000244 for (f = format; *f; f++) {
245 if (*f == '%') {
246 const char* p = f++;
247 Py_ssize_t i;
248 int longflag = 0;
249 int size_tflag = 0;
250 /* parse the width.precision part (we're only
251 interested in the precision value, if any) */
252 n = 0;
253 while (isdigit(Py_CHARMASK(*f)))
254 n = (n*10) + *f++ - '0';
255 if (*f == '.') {
256 f++;
257 n = 0;
258 while (isdigit(Py_CHARMASK(*f)))
259 n = (n*10) + *f++ - '0';
260 }
261 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
262 f++;
263 /* handle the long flag, but only for %ld and %lu.
264 others can be added when necessary. */
265 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
266 longflag = 1;
267 ++f;
268 }
269 /* handle the size_t flag. */
270 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
271 size_tflag = 1;
272 ++f;
273 }
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000275 switch (*f) {
276 case 'c':
277 *s++ = va_arg(vargs, int);
278 break;
279 case 'd':
280 if (longflag)
281 sprintf(s, "%ld", va_arg(vargs, long));
282 else if (size_tflag)
283 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
284 va_arg(vargs, Py_ssize_t));
285 else
286 sprintf(s, "%d", va_arg(vargs, int));
287 s += strlen(s);
288 break;
289 case 'u':
290 if (longflag)
291 sprintf(s, "%lu",
292 va_arg(vargs, unsigned long));
293 else if (size_tflag)
294 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
295 va_arg(vargs, size_t));
296 else
297 sprintf(s, "%u",
298 va_arg(vargs, unsigned int));
299 s += strlen(s);
300 break;
301 case 'i':
302 sprintf(s, "%i", va_arg(vargs, int));
303 s += strlen(s);
304 break;
305 case 'x':
306 sprintf(s, "%x", va_arg(vargs, int));
307 s += strlen(s);
308 break;
309 case 's':
310 p = va_arg(vargs, char*);
311 i = strlen(p);
312 if (n > 0 && i > n)
313 i = n;
314 Py_MEMCPY(s, p, i);
315 s += i;
316 break;
317 case 'p':
318 sprintf(s, "%p", va_arg(vargs, void*));
319 /* %p is ill-defined: ensure leading 0x. */
320 if (s[1] == 'X')
321 s[1] = 'x';
322 else if (s[1] != 'x') {
323 memmove(s+2, s, strlen(s)+1);
324 s[0] = '0';
325 s[1] = 'x';
326 }
327 s += strlen(s);
328 break;
329 case '%':
330 *s++ = '%';
331 break;
332 default:
333 strcpy(s, p);
334 s += strlen(s);
335 goto end;
336 }
337 } else
338 *s++ = *f;
339 }
Christian Heimes44720832008-05-26 13:01:01 +0000340
341 end:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000342 _PyString_Resize(&string, s - PyString_AS_STRING(string));
343 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000344}
345
346PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000347PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000348{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000349 PyObject* ret;
350 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000351
352#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000353 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000354#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000355 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000356#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000357 ret = PyString_FromFormatV(format, vargs);
358 va_end(vargs);
359 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000360}
361
362
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000363PyObject *PyString_Decode(const char *s,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000364 Py_ssize_t size,
365 const char *encoding,
366 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000367{
368 PyObject *v, *str;
369
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000370 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000371 if (str == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000372 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000373 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000374 Py_DECREF(str);
375 return v;
376}
377
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000378PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000379 const char *encoding,
380 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000381{
382 PyObject *v;
383
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000384 if (!PyString_Check(str)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000385 PyErr_BadArgument();
386 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000387 }
388
Christian Heimes44720832008-05-26 13:01:01 +0000389 if (encoding == NULL) {
390#ifdef Py_USING_UNICODE
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000391 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000392#else
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000393 PyErr_SetString(PyExc_ValueError, "no encoding specified");
394 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000395#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000396 }
Christian Heimes44720832008-05-26 13:01:01 +0000397
398 /* Decode via the codec registry */
399 v = PyCodec_Decode(str, encoding, errors);
400 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000401 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000402
403 return v;
404
405 onError:
406 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000407}
408
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000409PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000410 const char *encoding,
411 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000412{
Christian Heimes44720832008-05-26 13:01:01 +0000413 PyObject *v;
414
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000415 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000416 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000417 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000418
419#ifdef Py_USING_UNICODE
420 /* Convert Unicode to a string using the default encoding */
421 if (PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000422 PyObject *temp = v;
423 v = PyUnicode_AsEncodedString(v, NULL, NULL);
424 Py_DECREF(temp);
425 if (v == NULL)
426 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000427 }
Christian Heimes44720832008-05-26 13:01:01 +0000428#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000429 if (!PyString_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000430 PyErr_Format(PyExc_TypeError,
431 "decoder did not return a string object (type=%.400s)",
432 Py_TYPE(v)->tp_name);
433 Py_DECREF(v);
434 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000435 }
Christian Heimes44720832008-05-26 13:01:01 +0000436
437 return v;
438
439 onError:
440 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000441}
442
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000443PyObject *PyString_Encode(const char *s,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000444 Py_ssize_t size,
445 const char *encoding,
446 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000447{
Christian Heimes44720832008-05-26 13:01:01 +0000448 PyObject *v, *str;
449
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000450 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000451 if (str == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000452 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000453 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000454 Py_DECREF(str);
455 return v;
456}
457
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000458PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000459 const char *encoding,
460 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000461{
462 PyObject *v;
463
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000464 if (!PyString_Check(str)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000465 PyErr_BadArgument();
466 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000467 }
468
469 if (encoding == NULL) {
470#ifdef Py_USING_UNICODE
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000471 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000472#else
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000473 PyErr_SetString(PyExc_ValueError, "no encoding specified");
474 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000475#endif
476 }
477
478 /* Encode via the codec registry */
479 v = PyCodec_Encode(str, encoding, errors);
480 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000481 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000482
483 return v;
484
485 onError:
486 return NULL;
487}
488
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000489PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000490 const char *encoding,
491 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000492{
493 PyObject *v;
494
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000495 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000496 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000497 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000498
499#ifdef Py_USING_UNICODE
500 /* Convert Unicode to a string using the default encoding */
501 if (PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000502 PyObject *temp = v;
503 v = PyUnicode_AsEncodedString(v, NULL, NULL);
504 Py_DECREF(temp);
505 if (v == NULL)
506 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000507 }
508#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000509 if (!PyString_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +0000510 PyErr_Format(PyExc_TypeError,
511 "encoder did not return a string object (type=%.400s)",
512 Py_TYPE(v)->tp_name);
513 Py_DECREF(v);
514 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000515 }
516
517 return v;
518
519 onError:
520 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000521}
522
523static void
Christian Heimes44720832008-05-26 13:01:01 +0000524string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000525{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000526 switch (PyString_CHECK_INTERNED(op)) {
527 case SSTATE_NOT_INTERNED:
528 break;
Christian Heimes44720832008-05-26 13:01:01 +0000529
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000530 case SSTATE_INTERNED_MORTAL:
531 /* revive dead object temporarily for DelItem */
532 Py_REFCNT(op) = 3;
533 if (PyDict_DelItem(interned, op) != 0)
534 Py_FatalError(
535 "deletion of interned string failed");
536 break;
Christian Heimes44720832008-05-26 13:01:01 +0000537
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000538 case SSTATE_INTERNED_IMMORTAL:
539 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000540
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000541 default:
542 Py_FatalError("Inconsistent interned string state.");
543 }
544 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000545}
546
Christian Heimes44720832008-05-26 13:01:01 +0000547/* Unescape a backslash-escaped string. If unicode is non-zero,
548 the string is a u-literal. If recode_encoding is non-zero,
549 the string is UTF-8 encoded and should be re-encoded in the
550 specified encoding. */
551
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000552PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000553 Py_ssize_t len,
554 const char *errors,
555 Py_ssize_t unicode,
556 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000557{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000558 int c;
559 char *p, *buf;
560 const char *end;
561 PyObject *v;
562 Py_ssize_t newlen = recode_encoding ? 4*len:len;
563 v = PyString_FromStringAndSize((char *)NULL, newlen);
564 if (v == NULL)
565 return NULL;
566 p = buf = PyString_AsString(v);
567 end = s + len;
568 while (s < end) {
569 if (*s != '\\') {
570 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000571#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000572 if (recode_encoding && (*s & 0x80)) {
573 PyObject *u, *w;
574 char *r;
575 const char* t;
576 Py_ssize_t rn;
577 t = s;
578 /* Decode non-ASCII bytes as UTF-8. */
579 while (t < end && (*t & 0x80)) t++;
580 u = PyUnicode_DecodeUTF8(s, t - s, errors);
581 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000583 /* Recode them in target encoding. */
584 w = PyUnicode_AsEncodedString(
585 u, recode_encoding, errors);
586 Py_DECREF(u);
587 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000588
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000589 /* Append bytes to output buffer. */
590 assert(PyString_Check(w));
591 r = PyString_AS_STRING(w);
592 rn = PyString_GET_SIZE(w);
593 Py_MEMCPY(p, r, rn);
594 p += rn;
595 Py_DECREF(w);
596 s = t;
597 } else {
598 *p++ = *s++;
599 }
Christian Heimes44720832008-05-26 13:01:01 +0000600#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000601 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000602#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000603 continue;
604 }
605 s++;
606 if (s==end) {
607 PyErr_SetString(PyExc_ValueError,
608 "Trailing \\ in string");
609 goto failed;
610 }
611 switch (*s++) {
612 /* XXX This assumes ASCII! */
613 case '\n': break;
614 case '\\': *p++ = '\\'; break;
615 case '\'': *p++ = '\''; break;
616 case '\"': *p++ = '\"'; break;
617 case 'b': *p++ = '\b'; break;
618 case 'f': *p++ = '\014'; break; /* FF */
619 case 't': *p++ = '\t'; break;
620 case 'n': *p++ = '\n'; break;
621 case 'r': *p++ = '\r'; break;
622 case 'v': *p++ = '\013'; break; /* VT */
623 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
624 case '0': case '1': case '2': case '3':
625 case '4': case '5': case '6': case '7':
626 c = s[-1] - '0';
627 if (s < end && '0' <= *s && *s <= '7') {
628 c = (c<<3) + *s++ - '0';
629 if (s < end && '0' <= *s && *s <= '7')
630 c = (c<<3) + *s++ - '0';
631 }
632 *p++ = c;
633 break;
634 case 'x':
635 if (s+1 < end &&
636 isxdigit(Py_CHARMASK(s[0])) &&
637 isxdigit(Py_CHARMASK(s[1])))
638 {
639 unsigned int x = 0;
640 c = Py_CHARMASK(*s);
641 s++;
642 if (isdigit(c))
643 x = c - '0';
644 else if (islower(c))
645 x = 10 + c - 'a';
646 else
647 x = 10 + c - 'A';
648 x = x << 4;
649 c = Py_CHARMASK(*s);
650 s++;
651 if (isdigit(c))
652 x += c - '0';
653 else if (islower(c))
654 x += 10 + c - 'a';
655 else
656 x += 10 + c - 'A';
657 *p++ = x;
658 break;
659 }
660 if (!errors || strcmp(errors, "strict") == 0) {
661 PyErr_SetString(PyExc_ValueError,
662 "invalid \\x escape");
663 goto failed;
664 }
665 if (strcmp(errors, "replace") == 0) {
666 *p++ = '?';
667 } else if (strcmp(errors, "ignore") == 0)
668 /* do nothing */;
669 else {
670 PyErr_Format(PyExc_ValueError,
671 "decoding error; "
672 "unknown error handling code: %.400s",
673 errors);
674 goto failed;
675 }
Christian Heimes44720832008-05-26 13:01:01 +0000676#ifndef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000677 case 'u':
678 case 'U':
679 case 'N':
680 if (unicode) {
681 PyErr_SetString(PyExc_ValueError,
682 "Unicode escapes not legal "
683 "when Unicode disabled");
684 goto failed;
685 }
Christian Heimes44720832008-05-26 13:01:01 +0000686#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000687 default:
688 *p++ = '\\';
689 s--;
690 goto non_esc; /* an arbitry number of unescaped
691 UTF-8 bytes may follow. */
692 }
693 }
694 if (p-buf < newlen)
695 _PyString_Resize(&v, p - buf);
696 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000697 failed:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000698 Py_DECREF(v);
699 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000700}
701
702/* -------------------------------------------------------------------- */
703/* object api */
704
Christian Heimes1a6387e2008-03-26 12:49:49 +0000705static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000706string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000707{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000708 char *s;
709 Py_ssize_t len;
710 if (PyString_AsStringAndSize(op, &s, &len))
711 return -1;
712 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000713}
714
Christian Heimes44720832008-05-26 13:01:01 +0000715static /*const*/ char *
716string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000717{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000718 char *s;
719 Py_ssize_t len;
720 if (PyString_AsStringAndSize(op, &s, &len))
721 return NULL;
722 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000723}
724
725Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000726PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000727{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000728 if (!PyString_Check(op))
729 return string_getsize(op);
730 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000731}
732
Christian Heimes44720832008-05-26 13:01:01 +0000733/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000734PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000735{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000736 if (!PyString_Check(op))
737 return string_getbuffer(op);
738 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000739}
740
741int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000742PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000743 register char **s,
744 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000745{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000746 if (s == NULL) {
747 PyErr_BadInternalCall();
748 return -1;
749 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000750
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000751 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000752#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000753 if (PyUnicode_Check(obj)) {
754 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
755 if (obj == NULL)
756 return -1;
757 }
758 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000759#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000760 {
761 PyErr_Format(PyExc_TypeError,
762 "expected string or Unicode object, "
763 "%.200s found", Py_TYPE(obj)->tp_name);
764 return -1;
765 }
766 }
Christian Heimes44720832008-05-26 13:01:01 +0000767
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000768 *s = PyString_AS_STRING(obj);
769 if (len != NULL)
770 *len = PyString_GET_SIZE(obj);
771 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
772 PyErr_SetString(PyExc_TypeError,
773 "expected string without null bytes");
774 return -1;
775 }
776 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000777}
778
Christian Heimes1a6387e2008-03-26 12:49:49 +0000779/* -------------------------------------------------------------------- */
780/* Methods */
781
Christian Heimes44720832008-05-26 13:01:01 +0000782#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000783#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000784
Christian Heimes1a6387e2008-03-26 12:49:49 +0000785#include "stringlib/count.h"
786#include "stringlib/find.h"
787#include "stringlib/partition.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000789#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000790#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000791
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792
793
794static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000795string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000797 Py_ssize_t i, str_len;
798 char c;
799 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000800
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000801 /* XXX Ought to check for interrupts when writing long strings */
802 if (! PyString_CheckExact(op)) {
803 int ret;
804 /* A str subclass may have its own __str__ method. */
805 op = (PyStringObject *) PyObject_Str((PyObject *)op);
806 if (op == NULL)
807 return -1;
808 ret = string_print(op, fp, flags);
809 Py_DECREF(op);
810 return ret;
811 }
812 if (flags & Py_PRINT_RAW) {
813 char *data = op->ob_sval;
814 Py_ssize_t size = Py_SIZE(op);
815 Py_BEGIN_ALLOW_THREADS
816 while (size > INT_MAX) {
817 /* Very long strings cannot be written atomically.
818 * But don't write exactly INT_MAX bytes at a time
819 * to avoid memory aligment issues.
820 */
821 const int chunk_size = INT_MAX & ~0x3FFF;
822 fwrite(data, 1, chunk_size, fp);
823 data += chunk_size;
824 size -= chunk_size;
825 }
Christian Heimes44720832008-05-26 13:01:01 +0000826#ifdef __VMS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000827 if (size) fwrite(data, (int)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000828#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000829 fwrite(data, 1, (int)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000830#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000831 Py_END_ALLOW_THREADS
832 return 0;
833 }
Christian Heimes44720832008-05-26 13:01:01 +0000834
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000835 /* figure out which quote to use; single is preferred */
836 quote = '\'';
837 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
838 !memchr(op->ob_sval, '"', Py_SIZE(op)))
839 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000840
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000841 str_len = Py_SIZE(op);
842 Py_BEGIN_ALLOW_THREADS
843 fputc(quote, fp);
844 for (i = 0; i < str_len; i++) {
845 /* Since strings are immutable and the caller should have a
846 reference, accessing the interal buffer should not be an issue
847 with the GIL released. */
848 c = op->ob_sval[i];
849 if (c == quote || c == '\\')
850 fprintf(fp, "\\%c", c);
851 else if (c == '\t')
852 fprintf(fp, "\\t");
853 else if (c == '\n')
854 fprintf(fp, "\\n");
855 else if (c == '\r')
856 fprintf(fp, "\\r");
857 else if (c < ' ' || c >= 0x7f)
858 fprintf(fp, "\\x%02x", c & 0xff);
859 else
860 fputc(c, fp);
861 }
862 fputc(quote, fp);
863 Py_END_ALLOW_THREADS
864 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000865}
866
Christian Heimes44720832008-05-26 13:01:01 +0000867PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000868PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000869{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000870 register PyStringObject* op = (PyStringObject*) obj;
871 size_t newsize = 2 + 4 * Py_SIZE(op);
872 PyObject *v;
873 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) {
874 PyErr_SetString(PyExc_OverflowError,
875 "string is too large to make repr");
876 return NULL;
877 }
878 v = PyString_FromStringAndSize((char *)NULL, newsize);
879 if (v == NULL) {
880 return NULL;
881 }
882 else {
883 register Py_ssize_t i;
884 register char c;
885 register char *p;
886 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000887
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000888 /* figure out which quote to use; single is preferred */
889 quote = '\'';
890 if (smartquotes &&
891 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
892 !memchr(op->ob_sval, '"', Py_SIZE(op)))
893 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000894
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000895 p = PyString_AS_STRING(v);
896 *p++ = quote;
897 for (i = 0; i < Py_SIZE(op); i++) {
898 /* There's at least enough room for a hex escape
899 and a closing quote. */
900 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
901 c = op->ob_sval[i];
902 if (c == quote || c == '\\')
903 *p++ = '\\', *p++ = c;
904 else if (c == '\t')
905 *p++ = '\\', *p++ = 't';
906 else if (c == '\n')
907 *p++ = '\\', *p++ = 'n';
908 else if (c == '\r')
909 *p++ = '\\', *p++ = 'r';
910 else if (c < ' ' || c >= 0x7f) {
911 /* For performance, we don't want to call
912 PyOS_snprintf here (extra layers of
913 function call). */
914 sprintf(p, "\\x%02x", c & 0xff);
915 p += 4;
916 }
917 else
918 *p++ = c;
919 }
920 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
921 *p++ = quote;
922 *p = '\0';
923 _PyString_Resize(
924 &v, (p - PyString_AS_STRING(v)));
925 return v;
926 }
Christian Heimes44720832008-05-26 13:01:01 +0000927}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000928
929static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000930string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000931{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000932 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000933}
934
Christian Heimes1a6387e2008-03-26 12:49:49 +0000935static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000936string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000937{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000938 assert(PyString_Check(s));
939 if (PyString_CheckExact(s)) {
940 Py_INCREF(s);
941 return s;
942 }
943 else {
944 /* Subtype -- return genuine string with the same value. */
945 PyStringObject *t = (PyStringObject *) s;
946 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
947 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000948}
949
Christian Heimes44720832008-05-26 13:01:01 +0000950static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000951string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +0000952{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000953 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +0000954}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000955
Christian Heimes44720832008-05-26 13:01:01 +0000956static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000957string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +0000958{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000959 register Py_ssize_t size;
960 register PyStringObject *op;
961 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +0000962#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000963 if (PyUnicode_Check(bb))
964 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +0000965#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000966 if (PyByteArray_Check(bb))
967 return PyByteArray_Concat((PyObject *)a, bb);
968 PyErr_Format(PyExc_TypeError,
969 "cannot concatenate 'str' and '%.200s' objects",
970 Py_TYPE(bb)->tp_name);
971 return NULL;
972 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000973#define b ((PyStringObject *)bb)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000974 /* Optimize cases with empty left or right operand */
975 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
976 PyString_CheckExact(a) && PyString_CheckExact(b)) {
977 if (Py_SIZE(a) == 0) {
978 Py_INCREF(bb);
979 return bb;
980 }
981 Py_INCREF(a);
982 return (PyObject *)a;
983 }
984 size = Py_SIZE(a) + Py_SIZE(b);
985 /* Check that string sizes are not negative, to prevent an
986 overflow in cases where we are passed incorrectly-created
987 strings with negative lengths (due to a bug in other code).
988 */
989 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
990 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
991 PyErr_SetString(PyExc_OverflowError,
992 "strings are too large to concat");
993 return NULL;
994 }
995
996 /* Inline PyObject_NewVar */
997 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {
998 PyErr_SetString(PyExc_OverflowError,
999 "strings are too large to concat");
1000 return NULL;
1001 }
1002 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
1003 if (op == NULL)
1004 return PyErr_NoMemory();
1005 PyObject_INIT_VAR(op, &PyString_Type, size);
1006 op->ob_shash = -1;
1007 op->ob_sstate = SSTATE_NOT_INTERNED;
1008 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1009 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1010 op->ob_sval[size] = '\0';
1011 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001012#undef b
1013}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001014
Christian Heimes44720832008-05-26 13:01:01 +00001015static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001016string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001017{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001018 register Py_ssize_t i;
1019 register Py_ssize_t j;
1020 register Py_ssize_t size;
1021 register PyStringObject *op;
1022 size_t nbytes;
1023 if (n < 0)
1024 n = 0;
1025 /* watch out for overflows: the size can overflow int,
1026 * and the # of bytes needed can overflow size_t
1027 */
1028 size = Py_SIZE(a) * n;
1029 if (n && size / n != Py_SIZE(a)) {
1030 PyErr_SetString(PyExc_OverflowError,
1031 "repeated string is too long");
1032 return NULL;
1033 }
1034 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1035 Py_INCREF(a);
1036 return (PyObject *)a;
1037 }
1038 nbytes = (size_t)size;
1039 if (nbytes + sizeof(PyStringObject) <= nbytes) {
1040 PyErr_SetString(PyExc_OverflowError,
1041 "repeated string is too long");
1042 return NULL;
1043 }
1044 op = (PyStringObject *)
1045 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1046 if (op == NULL)
1047 return PyErr_NoMemory();
1048 PyObject_INIT_VAR(op, &PyString_Type, size);
1049 op->ob_shash = -1;
1050 op->ob_sstate = SSTATE_NOT_INTERNED;
1051 op->ob_sval[size] = '\0';
1052 if (Py_SIZE(a) == 1 && n > 0) {
1053 memset(op->ob_sval, a->ob_sval[0] , n);
1054 return (PyObject *) op;
1055 }
1056 i = 0;
1057 if (i < size) {
1058 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1059 i = Py_SIZE(a);
1060 }
1061 while (i < size) {
1062 j = (i <= size-i) ? i : size-i;
1063 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1064 i += j;
1065 }
1066 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001067}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001068
Christian Heimes44720832008-05-26 13:01:01 +00001069/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1070
1071static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001072string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001073 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001074 /* j -- may be negative! */
1075{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001076 if (i < 0)
1077 i = 0;
1078 if (j < 0)
1079 j = 0; /* Avoid signed/unsigned bug in next line */
1080 if (j > Py_SIZE(a))
1081 j = Py_SIZE(a);
1082 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1083 /* It's the same as a */
1084 Py_INCREF(a);
1085 return (PyObject *)a;
1086 }
1087 if (j < i)
1088 j = i;
1089 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001090}
1091
1092static int
1093string_contains(PyObject *str_obj, PyObject *sub_obj)
1094{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001095 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001096#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001097 if (PyUnicode_Check(sub_obj))
1098 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001099#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001100 if (!PyString_Check(sub_obj)) {
1101 PyErr_Format(PyExc_TypeError,
1102 "'in <string>' requires string as left operand, "
1103 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1104 return -1;
1105 }
1106 }
Christian Heimes44720832008-05-26 13:01:01 +00001107
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001108 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001109}
1110
1111static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001112string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001113{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001114 char pchar;
1115 PyObject *v;
1116 if (i < 0 || i >= Py_SIZE(a)) {
1117 PyErr_SetString(PyExc_IndexError, "string index out of range");
1118 return NULL;
1119 }
1120 pchar = a->ob_sval[i];
1121 v = (PyObject *)characters[pchar & UCHAR_MAX];
1122 if (v == NULL)
1123 v = PyString_FromStringAndSize(&pchar, 1);
1124 else {
Christian Heimes44720832008-05-26 13:01:01 +00001125#ifdef COUNT_ALLOCS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001126 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001127#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001128 Py_INCREF(v);
1129 }
1130 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001131}
1132
1133static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001134string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001135{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001136 int c;
1137 Py_ssize_t len_a, len_b;
1138 Py_ssize_t min_len;
1139 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001140
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001141 /* Make sure both arguments are strings. */
1142 if (!(PyString_Check(a) && PyString_Check(b))) {
1143 result = Py_NotImplemented;
1144 goto out;
1145 }
1146 if (a == b) {
1147 switch (op) {
1148 case Py_EQ:case Py_LE:case Py_GE:
1149 result = Py_True;
1150 goto out;
1151 case Py_NE:case Py_LT:case Py_GT:
1152 result = Py_False;
1153 goto out;
1154 }
1155 }
1156 if (op == Py_EQ) {
1157 /* Supporting Py_NE here as well does not save
1158 much time, since Py_NE is rarely used. */
1159 if (Py_SIZE(a) == Py_SIZE(b)
1160 && (a->ob_sval[0] == b->ob_sval[0]
1161 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1162 result = Py_True;
1163 } else {
1164 result = Py_False;
1165 }
1166 goto out;
1167 }
1168 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1169 min_len = (len_a < len_b) ? len_a : len_b;
1170 if (min_len > 0) {
1171 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1172 if (c==0)
1173 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1174 } else
1175 c = 0;
1176 if (c == 0)
1177 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1178 switch (op) {
1179 case Py_LT: c = c < 0; break;
1180 case Py_LE: c = c <= 0; break;
1181 case Py_EQ: assert(0); break; /* unreachable */
1182 case Py_NE: c = c != 0; break;
1183 case Py_GT: c = c > 0; break;
1184 case Py_GE: c = c >= 0; break;
1185 default:
1186 result = Py_NotImplemented;
1187 goto out;
1188 }
1189 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001190 out:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001191 Py_INCREF(result);
1192 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001193}
1194
1195int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001196_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001197{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001198 PyStringObject *a = (PyStringObject*) o1;
1199 PyStringObject *b = (PyStringObject*) o2;
1200 return Py_SIZE(a) == Py_SIZE(b)
1201 && *a->ob_sval == *b->ob_sval
1202 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001203}
1204
1205static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001206string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001207{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001208 register Py_ssize_t len;
1209 register unsigned char *p;
1210 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001211
Benjamin Peterson26da9202012-02-21 11:08:50 -05001212 assert(_Py_HashSecret_Initialized);
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001213 if (a->ob_shash != -1)
1214 return a->ob_shash;
1215 len = Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001216 /*
1217 We make the hash of the empty string be 0, rather than using
1218 (prefix ^ suffix), since this slightly obfuscates the hash secret
1219 */
1220 if (len == 0) {
1221 a->ob_shash = 0;
1222 return 0;
1223 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001224 p = (unsigned char *) a->ob_sval;
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001225 x = _Py_HashSecret.prefix;
1226 x ^= *p << 7;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001227 while (--len >= 0)
1228 x = (1000003*x) ^ *p++;
1229 x ^= Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001230 x ^= _Py_HashSecret.suffix;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001231 if (x == -1)
1232 x = -2;
1233 a->ob_shash = x;
1234 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001235}
1236
1237static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001238string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001239{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001240 if (PyIndex_Check(item)) {
1241 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1242 if (i == -1 && PyErr_Occurred())
1243 return NULL;
1244 if (i < 0)
1245 i += PyString_GET_SIZE(self);
1246 return string_item(self, i);
1247 }
1248 else if (PySlice_Check(item)) {
1249 Py_ssize_t start, stop, step, slicelength, cur, i;
1250 char* source_buf;
1251 char* result_buf;
1252 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001253
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001254 if (PySlice_GetIndicesEx((PySliceObject*)item,
1255 PyString_GET_SIZE(self),
1256 &start, &stop, &step, &slicelength) < 0) {
1257 return NULL;
1258 }
Christian Heimes44720832008-05-26 13:01:01 +00001259
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001260 if (slicelength <= 0) {
1261 return PyString_FromStringAndSize("", 0);
1262 }
1263 else if (start == 0 && step == 1 &&
1264 slicelength == PyString_GET_SIZE(self) &&
1265 PyString_CheckExact(self)) {
1266 Py_INCREF(self);
1267 return (PyObject *)self;
1268 }
1269 else if (step == 1) {
1270 return PyString_FromStringAndSize(
1271 PyString_AS_STRING(self) + start,
1272 slicelength);
1273 }
1274 else {
1275 source_buf = PyString_AsString((PyObject*)self);
1276 result_buf = (char *)PyMem_Malloc(slicelength);
1277 if (result_buf == NULL)
1278 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001279
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001280 for (cur = start, i = 0; i < slicelength;
1281 cur += step, i++) {
1282 result_buf[i] = source_buf[cur];
1283 }
Christian Heimes44720832008-05-26 13:01:01 +00001284
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001285 result = PyString_FromStringAndSize(result_buf,
1286 slicelength);
1287 PyMem_Free(result_buf);
1288 return result;
1289 }
1290 }
1291 else {
1292 PyErr_Format(PyExc_TypeError,
1293 "string indices must be integers, not %.200s",
1294 Py_TYPE(item)->tp_name);
1295 return NULL;
1296 }
Christian Heimes44720832008-05-26 13:01:01 +00001297}
1298
1299static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001300string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001301{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001302 if ( index != 0 ) {
1303 PyErr_SetString(PyExc_SystemError,
1304 "accessing non-existent string segment");
1305 return -1;
1306 }
1307 *ptr = (void *)self->ob_sval;
1308 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001309}
1310
1311static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001312string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001313{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001314 PyErr_SetString(PyExc_TypeError,
1315 "Cannot use string as modifiable buffer");
1316 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001317}
1318
1319static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001320string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001321{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001322 if ( lenp )
1323 *lenp = Py_SIZE(self);
1324 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001325}
1326
1327static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001328string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001329{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001330 if ( index != 0 ) {
1331 PyErr_SetString(PyExc_SystemError,
1332 "accessing non-existent string segment");
1333 return -1;
1334 }
1335 *ptr = self->ob_sval;
1336 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001337}
1338
1339static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001340string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001341{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001342 return PyBuffer_FillInfo(view, (PyObject*)self,
1343 (void *)self->ob_sval, Py_SIZE(self),
1344 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001345}
1346
1347static PySequenceMethods string_as_sequence = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001348 (lenfunc)string_length, /*sq_length*/
1349 (binaryfunc)string_concat, /*sq_concat*/
1350 (ssizeargfunc)string_repeat, /*sq_repeat*/
1351 (ssizeargfunc)string_item, /*sq_item*/
1352 (ssizessizeargfunc)string_slice, /*sq_slice*/
1353 0, /*sq_ass_item*/
1354 0, /*sq_ass_slice*/
1355 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001356};
1357
1358static PyMappingMethods string_as_mapping = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001359 (lenfunc)string_length,
1360 (binaryfunc)string_subscript,
1361 0,
Christian Heimes44720832008-05-26 13:01:01 +00001362};
1363
1364static PyBufferProcs string_as_buffer = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001365 (readbufferproc)string_buffer_getreadbuf,
1366 (writebufferproc)string_buffer_getwritebuf,
1367 (segcountproc)string_buffer_getsegcount,
1368 (charbufferproc)string_buffer_getcharbuf,
1369 (getbufferproc)string_buffer_getbuffer,
1370 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001371};
1372
1373
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001374
Christian Heimes44720832008-05-26 13:01:01 +00001375#define LEFTSTRIP 0
1376#define RIGHTSTRIP 1
1377#define BOTHSTRIP 2
1378
1379/* Arrays indexed by above */
1380static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1381
1382#define STRIPNAME(i) (stripformat[i]+3)
1383
Christian Heimes1a6387e2008-03-26 12:49:49 +00001384
1385/* Don't call if length < 2 */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001386#define Py_STRING_MATCH(target, offset, pattern, length) \
1387 (target[offset] == pattern[0] && \
1388 target[offset+length-1] == pattern[length-1] && \
Christian Heimes1a6387e2008-03-26 12:49:49 +00001389 !memcmp(target+offset+1, pattern+1, length-2) )
1390
1391
Christian Heimes1a6387e2008-03-26 12:49:49 +00001392/* Overallocate the initial list to reduce the number of reallocs for small
1393 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1394 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1395 text (roughly 11 words per line) and field delimited data (usually 1-10
1396 fields). For large strings the split algorithms are bandwidth limited
1397 so increasing the preallocation likely will not improve things.*/
1398
1399#define MAX_PREALLOC 12
1400
1401/* 5 splits gives 6 elements */
1402#define PREALLOC_SIZE(maxsplit) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001403 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001404
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001405#define SPLIT_APPEND(data, left, right) \
1406 str = PyString_FromStringAndSize((data) + (left), \
1407 (right) - (left)); \
1408 if (str == NULL) \
1409 goto onError; \
1410 if (PyList_Append(list, str)) { \
1411 Py_DECREF(str); \
1412 goto onError; \
1413 } \
1414 else \
1415 Py_DECREF(str);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001416
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001417#define SPLIT_ADD(data, left, right) { \
1418 str = PyString_FromStringAndSize((data) + (left), \
1419 (right) - (left)); \
1420 if (str == NULL) \
1421 goto onError; \
1422 if (count < MAX_PREALLOC) { \
1423 PyList_SET_ITEM(list, count, str); \
1424 } else { \
1425 if (PyList_Append(list, str)) { \
1426 Py_DECREF(str); \
1427 goto onError; \
1428 } \
1429 else \
1430 Py_DECREF(str); \
1431 } \
1432 count++; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001433
1434/* Always force the list to the expected size. */
1435#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
1436
Christian Heimes44720832008-05-26 13:01:01 +00001437#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1438#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1439#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1440#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001441
1442Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001443split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001444{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001445 const char *s = PyString_AS_STRING(self);
1446 Py_ssize_t i, j, count=0;
1447 PyObject *str;
1448 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001449
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001450 if (list == NULL)
1451 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001452
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001453 i = j = 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001454
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001455 while (maxsplit-- > 0) {
1456 SKIP_SPACE(s, i, len);
1457 if (i==len) break;
1458 j = i; i++;
1459 SKIP_NONSPACE(s, i, len);
1460 if (j == 0 && i == len && PyString_CheckExact(self)) {
1461 /* No whitespace in self, so just use it as list[0] */
1462 Py_INCREF(self);
1463 PyList_SET_ITEM(list, 0, (PyObject *)self);
1464 count++;
1465 break;
1466 }
1467 SPLIT_ADD(s, j, i);
1468 }
Christian Heimes44720832008-05-26 13:01:01 +00001469
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001470 if (i < len) {
1471 /* Only occurs when maxsplit was reached */
1472 /* Skip any remaining whitespace and copy to end of string */
1473 SKIP_SPACE(s, i, len);
1474 if (i != len)
1475 SPLIT_ADD(s, i, len);
1476 }
1477 FIX_PREALLOC_SIZE(list);
1478 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001479 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001480 Py_DECREF(list);
1481 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001482}
1483
Christian Heimes1a6387e2008-03-26 12:49:49 +00001484Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001485split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001486{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001487 const char *s = PyString_AS_STRING(self);
1488 register Py_ssize_t i, j, count=0;
1489 PyObject *str;
1490 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001491
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001492 if (list == NULL)
1493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001494
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001495 i = j = 0;
1496 while ((j < len) && (maxcount-- > 0)) {
1497 for(; j<len; j++) {
1498 /* I found that using memchr makes no difference */
1499 if (s[j] == ch) {
1500 SPLIT_ADD(s, i, j);
1501 i = j = j + 1;
1502 break;
1503 }
1504 }
1505 }
1506 if (i == 0 && count == 0 && PyString_CheckExact(self)) {
1507 /* ch not in self, so just use self as list[0] */
1508 Py_INCREF(self);
1509 PyList_SET_ITEM(list, 0, (PyObject *)self);
1510 count++;
1511 }
1512 else if (i <= len) {
1513 SPLIT_ADD(s, i, len);
1514 }
1515 FIX_PREALLOC_SIZE(list);
1516 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001517
1518 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001519 Py_DECREF(list);
1520 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001521}
1522
1523PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001524"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001525\n\
Christian Heimes44720832008-05-26 13:01:01 +00001526Return a list of the words in the string S, using sep as the\n\
1527delimiter string. If maxsplit is given, at most maxsplit\n\
1528splits are done. If sep is not specified or is None, any\n\
1529whitespace string is a separator and empty strings are removed\n\
1530from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001531
1532static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001533string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001534{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001535 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1536 Py_ssize_t maxsplit = -1, count=0;
1537 const char *s = PyString_AS_STRING(self), *sub;
1538 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001539#ifdef USE_FAST
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001540 Py_ssize_t pos;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001541#endif
1542
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001543 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1544 return NULL;
1545 if (maxsplit < 0)
1546 maxsplit = PY_SSIZE_T_MAX;
1547 if (subobj == Py_None)
1548 return split_whitespace(self, len, maxsplit);
1549 if (PyString_Check(subobj)) {
1550 sub = PyString_AS_STRING(subobj);
1551 n = PyString_GET_SIZE(subobj);
1552 }
Christian Heimes44720832008-05-26 13:01:01 +00001553#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001554 else if (PyUnicode_Check(subobj))
1555 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001556#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001557 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1558 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001559
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001560 if (n == 0) {
1561 PyErr_SetString(PyExc_ValueError, "empty separator");
1562 return NULL;
1563 }
1564 else if (n == 1)
1565 return split_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001566
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001567 list = PyList_New(PREALLOC_SIZE(maxsplit));
1568 if (list == NULL)
1569 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001570
1571#ifdef USE_FAST
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001572 i = j = 0;
1573 while (maxsplit-- > 0) {
1574 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1575 if (pos < 0)
1576 break;
1577 j = i+pos;
1578 SPLIT_ADD(s, i, j);
1579 i = j + n;
1580 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001581#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001582 i = j = 0;
1583 while ((j+n <= len) && (maxsplit-- > 0)) {
1584 for (; j+n <= len; j++) {
1585 if (Py_STRING_MATCH(s, j, sub, n)) {
1586 SPLIT_ADD(s, i, j);
1587 i = j = j + n;
1588 break;
1589 }
1590 }
1591 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001592#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001593 SPLIT_ADD(s, i, len);
1594 FIX_PREALLOC_SIZE(list);
1595 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001596
Christian Heimes44720832008-05-26 13:01:01 +00001597 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001598 Py_DECREF(list);
1599 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001600}
1601
1602PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001603"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001604\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001605Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001606the separator itself, and the part after it. If the separator is not\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001607found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001608
1609static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001610string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001611{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001612 const char *sep;
1613 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001614
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001615 if (PyString_Check(sep_obj)) {
1616 sep = PyString_AS_STRING(sep_obj);
1617 sep_len = PyString_GET_SIZE(sep_obj);
1618 }
Christian Heimes44720832008-05-26 13:01:01 +00001619#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001620 else if (PyUnicode_Check(sep_obj))
1621 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001622#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001623 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1624 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001625
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001626 return stringlib_partition(
1627 (PyObject*) self,
1628 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1629 sep_obj, sep, sep_len
1630 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001631}
1632
1633PyDoc_STRVAR(rpartition__doc__,
Ezio Melottidabb5f72010-01-25 11:46:11 +00001634"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001635\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001636Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001637the part before it, the separator itself, and the part after it. If the\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00001638separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001639
1640static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001641string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001642{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001643 const char *sep;
1644 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001645
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001646 if (PyString_Check(sep_obj)) {
1647 sep = PyString_AS_STRING(sep_obj);
1648 sep_len = PyString_GET_SIZE(sep_obj);
1649 }
Christian Heimes44720832008-05-26 13:01:01 +00001650#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001651 else if (PyUnicode_Check(sep_obj))
1652 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001653#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001654 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1655 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001656
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001657 return stringlib_rpartition(
1658 (PyObject*) self,
1659 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1660 sep_obj, sep, sep_len
1661 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001662}
1663
1664Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001665rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001666{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001667 const char *s = PyString_AS_STRING(self);
1668 Py_ssize_t i, j, count=0;
1669 PyObject *str;
1670 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001671
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001672 if (list == NULL)
1673 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001674
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001675 i = j = len-1;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001676
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001677 while (maxsplit-- > 0) {
1678 RSKIP_SPACE(s, i);
1679 if (i<0) break;
1680 j = i; i--;
1681 RSKIP_NONSPACE(s, i);
1682 if (j == len-1 && i < 0 && PyString_CheckExact(self)) {
1683 /* No whitespace in self, so just use it as list[0] */
1684 Py_INCREF(self);
1685 PyList_SET_ITEM(list, 0, (PyObject *)self);
1686 count++;
1687 break;
1688 }
1689 SPLIT_ADD(s, i + 1, j + 1);
1690 }
1691 if (i >= 0) {
1692 /* Only occurs when maxsplit was reached */
1693 /* Skip any remaining whitespace and copy to beginning of string */
1694 RSKIP_SPACE(s, i);
1695 if (i >= 0)
1696 SPLIT_ADD(s, 0, i + 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001697
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001698 }
1699 FIX_PREALLOC_SIZE(list);
1700 if (PyList_Reverse(list) < 0)
1701 goto onError;
1702 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001703 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001704 Py_DECREF(list);
1705 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001706}
1707
1708Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001709rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001710{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001711 const char *s = PyString_AS_STRING(self);
1712 register Py_ssize_t i, j, count=0;
1713 PyObject *str;
1714 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
Christian Heimes1a6387e2008-03-26 12:49:49 +00001715
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001716 if (list == NULL)
1717 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001718
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001719 i = j = len - 1;
1720 while ((i >= 0) && (maxcount-- > 0)) {
1721 for (; i >= 0; i--) {
1722 if (s[i] == ch) {
1723 SPLIT_ADD(s, i + 1, j + 1);
1724 j = i = i - 1;
1725 break;
1726 }
1727 }
1728 }
1729 if (i < 0 && count == 0 && PyString_CheckExact(self)) {
1730 /* ch not in self, so just use self as list[0] */
1731 Py_INCREF(self);
1732 PyList_SET_ITEM(list, 0, (PyObject *)self);
1733 count++;
1734 }
1735 else if (j >= -1) {
1736 SPLIT_ADD(s, 0, j + 1);
1737 }
1738 FIX_PREALLOC_SIZE(list);
1739 if (PyList_Reverse(list) < 0)
1740 goto onError;
1741 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001742
Christian Heimes44720832008-05-26 13:01:01 +00001743 onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001744 Py_DECREF(list);
1745 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001746}
1747
1748PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001749"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001750\n\
Christian Heimes44720832008-05-26 13:01:01 +00001751Return a list of the words in the string S, using sep as the\n\
1752delimiter string, starting at the end of the string and working\n\
1753to the front. If maxsplit is given, at most maxsplit splits are\n\
1754done. If sep is not specified or is None, any whitespace string\n\
1755is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001756
1757static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001758string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001759{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001760 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1761 Py_ssize_t maxsplit = -1, count=0;
1762 const char *s, *sub;
1763 PyObject *list, *str, *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001764
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001765 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1766 return NULL;
1767 if (maxsplit < 0)
1768 maxsplit = PY_SSIZE_T_MAX;
1769 if (subobj == Py_None)
1770 return rsplit_whitespace(self, len, maxsplit);
1771 if (PyString_Check(subobj)) {
1772 sub = PyString_AS_STRING(subobj);
1773 n = PyString_GET_SIZE(subobj);
1774 }
Christian Heimes44720832008-05-26 13:01:01 +00001775#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001776 else if (PyUnicode_Check(subobj))
1777 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001778#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001779 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1780 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001781
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001782 if (n == 0) {
1783 PyErr_SetString(PyExc_ValueError, "empty separator");
1784 return NULL;
1785 }
1786 else if (n == 1)
1787 return rsplit_char(self, len, sub[0], maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001788
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001789 list = PyList_New(PREALLOC_SIZE(maxsplit));
1790 if (list == NULL)
1791 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001792
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001793 j = len;
1794 i = j - n;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001795
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001796 s = PyString_AS_STRING(self);
1797 while ( (i >= 0) && (maxsplit-- > 0) ) {
1798 for (; i>=0; i--) {
1799 if (Py_STRING_MATCH(s, i, sub, n)) {
1800 SPLIT_ADD(s, i + n, j);
1801 j = i;
1802 i -= n;
1803 break;
1804 }
1805 }
1806 }
1807 SPLIT_ADD(s, 0, j);
1808 FIX_PREALLOC_SIZE(list);
1809 if (PyList_Reverse(list) < 0)
1810 goto onError;
1811 return list;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001812
1813onError:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001814 Py_DECREF(list);
1815 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001816}
1817
1818
1819PyDoc_STRVAR(join__doc__,
Georg Brandl5d2eb342009-10-27 15:08:27 +00001820"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001821\n\
1822Return a string which is the concatenation of the strings in the\n\
Georg Brandl5d2eb342009-10-27 15:08:27 +00001823iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001824
1825static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001826string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001827{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001828 char *sep = PyString_AS_STRING(self);
1829 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1830 PyObject *res = NULL;
1831 char *p;
1832 Py_ssize_t seqlen = 0;
1833 size_t sz = 0;
1834 Py_ssize_t i;
1835 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001836
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001837 seq = PySequence_Fast(orig, "");
1838 if (seq == NULL) {
1839 return NULL;
1840 }
Christian Heimes44720832008-05-26 13:01:01 +00001841
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001842 seqlen = PySequence_Size(seq);
1843 if (seqlen == 0) {
1844 Py_DECREF(seq);
1845 return PyString_FromString("");
1846 }
1847 if (seqlen == 1) {
1848 item = PySequence_Fast_GET_ITEM(seq, 0);
1849 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1850 Py_INCREF(item);
1851 Py_DECREF(seq);
1852 return item;
1853 }
1854 }
Christian Heimes44720832008-05-26 13:01:01 +00001855
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001856 /* There are at least two things to join, or else we have a subclass
1857 * of the builtin types in the sequence.
1858 * Do a pre-pass to figure out the total amount of space we'll
1859 * need (sz), see whether any argument is absurd, and defer to
1860 * the Unicode join if appropriate.
1861 */
1862 for (i = 0; i < seqlen; i++) {
1863 const size_t old_sz = sz;
1864 item = PySequence_Fast_GET_ITEM(seq, i);
1865 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001866#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001867 if (PyUnicode_Check(item)) {
1868 /* Defer to Unicode join.
1869 * CAUTION: There's no gurantee that the
1870 * original sequence can be iterated over
1871 * again, so we must pass seq here.
1872 */
1873 PyObject *result;
1874 result = PyUnicode_Join((PyObject *)self, seq);
1875 Py_DECREF(seq);
1876 return result;
1877 }
Christian Heimes44720832008-05-26 13:01:01 +00001878#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001879 PyErr_Format(PyExc_TypeError,
1880 "sequence item %zd: expected string,"
1881 " %.80s found",
1882 i, Py_TYPE(item)->tp_name);
1883 Py_DECREF(seq);
1884 return NULL;
1885 }
1886 sz += PyString_GET_SIZE(item);
1887 if (i != 0)
1888 sz += seplen;
1889 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1890 PyErr_SetString(PyExc_OverflowError,
1891 "join() result is too long for a Python string");
1892 Py_DECREF(seq);
1893 return NULL;
1894 }
1895 }
Christian Heimes44720832008-05-26 13:01:01 +00001896
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001897 /* Allocate result space. */
1898 res = PyString_FromStringAndSize((char*)NULL, sz);
1899 if (res == NULL) {
1900 Py_DECREF(seq);
1901 return NULL;
1902 }
Christian Heimes44720832008-05-26 13:01:01 +00001903
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001904 /* Catenate everything. */
1905 p = PyString_AS_STRING(res);
1906 for (i = 0; i < seqlen; ++i) {
1907 size_t n;
1908 item = PySequence_Fast_GET_ITEM(seq, i);
1909 n = PyString_GET_SIZE(item);
1910 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1911 p += n;
1912 if (i < seqlen - 1) {
1913 Py_MEMCPY(p, sep, seplen);
1914 p += seplen;
1915 }
1916 }
Christian Heimes44720832008-05-26 13:01:01 +00001917
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001918 Py_DECREF(seq);
1919 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001920}
1921
1922PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001923_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001924{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001925 assert(sep != NULL && PyString_Check(sep));
1926 assert(x != NULL);
1927 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001928}
1929
1930Py_LOCAL_INLINE(void)
1931string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1932{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001933 if (*end > len)
1934 *end = len;
1935 else if (*end < 0)
1936 *end += len;
1937 if (*end < 0)
1938 *end = 0;
1939 if (*start < 0)
1940 *start += len;
1941 if (*start < 0)
1942 *start = 0;
Christian Heimes44720832008-05-26 13:01:01 +00001943}
1944
1945Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001946string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001947{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001948 PyObject *subobj;
1949 const char *sub;
1950 Py_ssize_t sub_len;
1951 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1952 PyObject *obj_start=Py_None, *obj_end=Py_None;
Christian Heimes44720832008-05-26 13:01:01 +00001953
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001954 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj,
1955 &obj_start, &obj_end))
1956 return -2;
1957 /* To support None in "start" and "end" arguments, meaning
1958 the same as if they were not passed.
1959 */
1960 if (obj_start != Py_None)
1961 if (!_PyEval_SliceIndex(obj_start, &start))
1962 return -2;
1963 if (obj_end != Py_None)
1964 if (!_PyEval_SliceIndex(obj_end, &end))
1965 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001966
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001967 if (PyString_Check(subobj)) {
1968 sub = PyString_AS_STRING(subobj);
1969 sub_len = PyString_GET_SIZE(subobj);
1970 }
Christian Heimes44720832008-05-26 13:01:01 +00001971#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001972 else if (PyUnicode_Check(subobj))
1973 return PyUnicode_Find(
1974 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001975#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001976 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1977 /* XXX - the "expected a character buffer object" is pretty
1978 confusing for a non-expert. remap to something else ? */
1979 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001980
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001981 if (dir > 0)
1982 return stringlib_find_slice(
1983 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1984 sub, sub_len, start, end);
1985 else
1986 return stringlib_rfind_slice(
1987 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1988 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001989}
1990
1991
1992PyDoc_STRVAR(find__doc__,
1993"S.find(sub [,start [,end]]) -> int\n\
1994\n\
1995Return the lowest index in S where substring sub is found,\n\
1996such that sub is contained within s[start:end]. Optional\n\
1997arguments start and end are interpreted as in slice notation.\n\
1998\n\
1999Return -1 on failure.");
2000
2001static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002002string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002003{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002004 Py_ssize_t result = string_find_internal(self, args, +1);
2005 if (result == -2)
2006 return NULL;
2007 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002008}
2009
2010
2011PyDoc_STRVAR(index__doc__,
2012"S.index(sub [,start [,end]]) -> int\n\
2013\n\
2014Like S.find() but raise ValueError when the substring is not found.");
2015
2016static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002017string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002018{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002019 Py_ssize_t result = string_find_internal(self, args, +1);
2020 if (result == -2)
2021 return NULL;
2022 if (result == -1) {
2023 PyErr_SetString(PyExc_ValueError,
2024 "substring not found");
2025 return NULL;
2026 }
2027 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002028}
2029
2030
2031PyDoc_STRVAR(rfind__doc__,
2032"S.rfind(sub [,start [,end]]) -> int\n\
2033\n\
2034Return the highest index in S where substring sub is found,\n\
2035such that sub is contained within s[start:end]. Optional\n\
2036arguments start and end are interpreted as in slice notation.\n\
2037\n\
2038Return -1 on failure.");
2039
2040static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002041string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002042{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002043 Py_ssize_t result = string_find_internal(self, args, -1);
2044 if (result == -2)
2045 return NULL;
2046 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002047}
2048
2049
2050PyDoc_STRVAR(rindex__doc__,
2051"S.rindex(sub [,start [,end]]) -> int\n\
2052\n\
2053Like S.rfind() but raise ValueError when the substring is not found.");
2054
2055static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002056string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002057{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002058 Py_ssize_t result = string_find_internal(self, args, -1);
2059 if (result == -2)
2060 return NULL;
2061 if (result == -1) {
2062 PyErr_SetString(PyExc_ValueError,
2063 "substring not found");
2064 return NULL;
2065 }
2066 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00002067}
2068
2069
2070Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002071do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00002072{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002073 char *s = PyString_AS_STRING(self);
2074 Py_ssize_t len = PyString_GET_SIZE(self);
2075 char *sep = PyString_AS_STRING(sepobj);
2076 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
2077 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002078
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002079 i = 0;
2080 if (striptype != RIGHTSTRIP) {
2081 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
2082 i++;
2083 }
2084 }
Christian Heimes44720832008-05-26 13:01:01 +00002085
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002086 j = len;
2087 if (striptype != LEFTSTRIP) {
2088 do {
2089 j--;
2090 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
2091 j++;
2092 }
Christian Heimes44720832008-05-26 13:01:01 +00002093
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002094 if (i == 0 && j == len && PyString_CheckExact(self)) {
2095 Py_INCREF(self);
2096 return (PyObject*)self;
2097 }
2098 else
2099 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002100}
2101
2102
2103Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002104do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00002105{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002106 char *s = PyString_AS_STRING(self);
2107 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00002108
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002109 i = 0;
2110 if (striptype != RIGHTSTRIP) {
2111 while (i < len && isspace(Py_CHARMASK(s[i]))) {
2112 i++;
2113 }
2114 }
Christian Heimes44720832008-05-26 13:01:01 +00002115
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002116 j = len;
2117 if (striptype != LEFTSTRIP) {
2118 do {
2119 j--;
2120 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2121 j++;
2122 }
Christian Heimes44720832008-05-26 13:01:01 +00002123
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002124 if (i == 0 && j == len && PyString_CheckExact(self)) {
2125 Py_INCREF(self);
2126 return (PyObject*)self;
2127 }
2128 else
2129 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00002130}
2131
2132
2133Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002134do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002135{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002136 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002137
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002138 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2139 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002140
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002141 if (sep != NULL && sep != Py_None) {
2142 if (PyString_Check(sep))
2143 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00002144#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002145 else if (PyUnicode_Check(sep)) {
2146 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2147 PyObject *res;
2148 if (uniself==NULL)
2149 return NULL;
2150 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2151 striptype, sep);
2152 Py_DECREF(uniself);
2153 return res;
2154 }
Christian Heimes44720832008-05-26 13:01:01 +00002155#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002156 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00002157#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002158 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00002159#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002160 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00002161#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002162 STRIPNAME(striptype));
2163 return NULL;
2164 }
Christian Heimes44720832008-05-26 13:01:01 +00002165
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002166 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00002167}
2168
2169
2170PyDoc_STRVAR(strip__doc__,
2171"S.strip([chars]) -> string or unicode\n\
2172\n\
2173Return a copy of the string S with leading and trailing\n\
2174whitespace removed.\n\
2175If chars is given and not None, remove characters in chars instead.\n\
2176If chars is unicode, S will be converted to unicode before stripping");
2177
2178static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002179string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002180{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002181 if (PyTuple_GET_SIZE(args) == 0)
2182 return do_strip(self, BOTHSTRIP); /* Common case */
2183 else
2184 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00002185}
2186
2187
2188PyDoc_STRVAR(lstrip__doc__,
2189"S.lstrip([chars]) -> string or unicode\n\
2190\n\
2191Return a copy of the string S with leading whitespace removed.\n\
2192If chars is given and not None, remove characters in chars instead.\n\
2193If chars is unicode, S will be converted to unicode before stripping");
2194
2195static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002196string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002197{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002198 if (PyTuple_GET_SIZE(args) == 0)
2199 return do_strip(self, LEFTSTRIP); /* Common case */
2200 else
2201 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00002202}
2203
2204
2205PyDoc_STRVAR(rstrip__doc__,
2206"S.rstrip([chars]) -> string or unicode\n\
2207\n\
2208Return a copy of the string S with trailing whitespace removed.\n\
2209If chars is given and not None, remove characters in chars instead.\n\
2210If chars is unicode, S will be converted to unicode before stripping");
2211
2212static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002213string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002214{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002215 if (PyTuple_GET_SIZE(args) == 0)
2216 return do_strip(self, RIGHTSTRIP); /* Common case */
2217 else
2218 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00002219}
2220
2221
2222PyDoc_STRVAR(lower__doc__,
2223"S.lower() -> string\n\
2224\n\
2225Return a copy of the string S converted to lowercase.");
2226
2227/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2228#ifndef _tolower
2229#define _tolower tolower
2230#endif
2231
2232static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002233string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002234{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002235 char *s;
2236 Py_ssize_t i, n = PyString_GET_SIZE(self);
2237 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002238
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002239 newobj = PyString_FromStringAndSize(NULL, n);
2240 if (!newobj)
2241 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002242
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002243 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002244
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002245 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002246
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002247 for (i = 0; i < n; i++) {
2248 int c = Py_CHARMASK(s[i]);
2249 if (isupper(c))
2250 s[i] = _tolower(c);
2251 }
Christian Heimes44720832008-05-26 13:01:01 +00002252
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002253 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002254}
2255
2256PyDoc_STRVAR(upper__doc__,
2257"S.upper() -> string\n\
2258\n\
2259Return a copy of the string S converted to uppercase.");
2260
2261#ifndef _toupper
2262#define _toupper toupper
2263#endif
2264
2265static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002266string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002267{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002268 char *s;
2269 Py_ssize_t i, n = PyString_GET_SIZE(self);
2270 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002271
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002272 newobj = PyString_FromStringAndSize(NULL, n);
2273 if (!newobj)
2274 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002275
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002276 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002277
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002278 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002279
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002280 for (i = 0; i < n; i++) {
2281 int c = Py_CHARMASK(s[i]);
2282 if (islower(c))
2283 s[i] = _toupper(c);
2284 }
Christian Heimes44720832008-05-26 13:01:01 +00002285
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002286 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002287}
2288
2289PyDoc_STRVAR(title__doc__,
2290"S.title() -> string\n\
2291\n\
2292Return a titlecased version of S, i.e. words start with uppercase\n\
2293characters, all remaining cased characters have lowercase.");
2294
2295static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002296string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002297{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002298 char *s = PyString_AS_STRING(self), *s_new;
2299 Py_ssize_t i, n = PyString_GET_SIZE(self);
2300 int previous_is_cased = 0;
2301 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002302
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002303 newobj = PyString_FromStringAndSize(NULL, n);
2304 if (newobj == NULL)
2305 return NULL;
2306 s_new = PyString_AsString(newobj);
2307 for (i = 0; i < n; i++) {
2308 int c = Py_CHARMASK(*s++);
2309 if (islower(c)) {
2310 if (!previous_is_cased)
2311 c = toupper(c);
2312 previous_is_cased = 1;
2313 } else if (isupper(c)) {
2314 if (previous_is_cased)
2315 c = tolower(c);
2316 previous_is_cased = 1;
2317 } else
2318 previous_is_cased = 0;
2319 *s_new++ = c;
2320 }
2321 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002322}
2323
2324PyDoc_STRVAR(capitalize__doc__,
2325"S.capitalize() -> string\n\
2326\n\
2327Return a copy of the string S with only its first character\n\
2328capitalized.");
2329
2330static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002331string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002332{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002333 char *s = PyString_AS_STRING(self), *s_new;
2334 Py_ssize_t i, n = PyString_GET_SIZE(self);
2335 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002336
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002337 newobj = PyString_FromStringAndSize(NULL, n);
2338 if (newobj == NULL)
2339 return NULL;
2340 s_new = PyString_AsString(newobj);
2341 if (0 < n) {
2342 int c = Py_CHARMASK(*s++);
2343 if (islower(c))
2344 *s_new = toupper(c);
2345 else
2346 *s_new = c;
2347 s_new++;
2348 }
2349 for (i = 1; i < n; i++) {
2350 int c = Py_CHARMASK(*s++);
2351 if (isupper(c))
2352 *s_new = tolower(c);
2353 else
2354 *s_new = c;
2355 s_new++;
2356 }
2357 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002358}
2359
2360
2361PyDoc_STRVAR(count__doc__,
2362"S.count(sub[, start[, end]]) -> int\n\
2363\n\
2364Return the number of non-overlapping occurrences of substring sub in\n\
2365string S[start:end]. Optional arguments start and end are interpreted\n\
2366as in slice notation.");
2367
2368static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002369string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002370{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002371 PyObject *sub_obj;
2372 const char *str = PyString_AS_STRING(self), *sub;
2373 Py_ssize_t sub_len;
2374 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002375
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002376 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2377 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2378 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002379
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002380 if (PyString_Check(sub_obj)) {
2381 sub = PyString_AS_STRING(sub_obj);
2382 sub_len = PyString_GET_SIZE(sub_obj);
2383 }
Christian Heimes44720832008-05-26 13:01:01 +00002384#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002385 else if (PyUnicode_Check(sub_obj)) {
2386 Py_ssize_t count;
2387 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2388 if (count == -1)
2389 return NULL;
2390 else
2391 return PyInt_FromSsize_t(count);
2392 }
Christian Heimes44720832008-05-26 13:01:01 +00002393#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002394 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2395 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002396
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002397 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002398
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002399 return PyInt_FromSsize_t(
2400 stringlib_count(str + start, end - start, sub, sub_len)
2401 );
Christian Heimes44720832008-05-26 13:01:01 +00002402}
2403
2404PyDoc_STRVAR(swapcase__doc__,
2405"S.swapcase() -> string\n\
2406\n\
2407Return a copy of the string S with uppercase characters\n\
2408converted to lowercase and vice versa.");
2409
2410static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002411string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002412{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002413 char *s = PyString_AS_STRING(self), *s_new;
2414 Py_ssize_t i, n = PyString_GET_SIZE(self);
2415 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002416
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002417 newobj = PyString_FromStringAndSize(NULL, n);
2418 if (newobj == NULL)
2419 return NULL;
2420 s_new = PyString_AsString(newobj);
2421 for (i = 0; i < n; i++) {
2422 int c = Py_CHARMASK(*s++);
2423 if (islower(c)) {
2424 *s_new = toupper(c);
2425 }
2426 else if (isupper(c)) {
2427 *s_new = tolower(c);
2428 }
2429 else
2430 *s_new = c;
2431 s_new++;
2432 }
2433 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002434}
2435
2436
2437PyDoc_STRVAR(translate__doc__,
2438"S.translate(table [,deletechars]) -> string\n\
2439\n\
2440Return a copy of the string S, where all characters occurring\n\
2441in the optional argument deletechars are removed, and the\n\
2442remaining characters have been mapped through the given\n\
2443translation table, which must be a string of length 256.");
2444
2445static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002446string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002447{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002448 register char *input, *output;
2449 const char *table;
2450 register Py_ssize_t i, c, changed = 0;
2451 PyObject *input_obj = (PyObject*)self;
2452 const char *output_start, *del_table=NULL;
2453 Py_ssize_t inlen, tablen, dellen = 0;
2454 PyObject *result;
2455 int trans_table[256];
2456 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002457
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002458 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2459 &tableobj, &delobj))
2460 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002461
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002462 if (PyString_Check(tableobj)) {
2463 table = PyString_AS_STRING(tableobj);
2464 tablen = PyString_GET_SIZE(tableobj);
2465 }
2466 else if (tableobj == Py_None) {
2467 table = NULL;
2468 tablen = 256;
2469 }
Christian Heimes44720832008-05-26 13:01:01 +00002470#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002471 else if (PyUnicode_Check(tableobj)) {
2472 /* Unicode .translate() does not support the deletechars
2473 parameter; instead a mapping to None will cause characters
2474 to be deleted. */
2475 if (delobj != NULL) {
2476 PyErr_SetString(PyExc_TypeError,
2477 "deletions are implemented differently for unicode");
2478 return NULL;
2479 }
2480 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2481 }
Christian Heimes44720832008-05-26 13:01:01 +00002482#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002483 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2484 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002485
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002486 if (tablen != 256) {
2487 PyErr_SetString(PyExc_ValueError,
2488 "translation table must be 256 characters long");
2489 return NULL;
2490 }
Christian Heimes44720832008-05-26 13:01:01 +00002491
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002492 if (delobj != NULL) {
2493 if (PyString_Check(delobj)) {
2494 del_table = PyString_AS_STRING(delobj);
2495 dellen = PyString_GET_SIZE(delobj);
2496 }
Christian Heimes44720832008-05-26 13:01:01 +00002497#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002498 else if (PyUnicode_Check(delobj)) {
2499 PyErr_SetString(PyExc_TypeError,
2500 "deletions are implemented differently for unicode");
2501 return NULL;
2502 }
Christian Heimes44720832008-05-26 13:01:01 +00002503#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002504 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2505 return NULL;
2506 }
2507 else {
2508 del_table = NULL;
2509 dellen = 0;
2510 }
Christian Heimes44720832008-05-26 13:01:01 +00002511
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002512 inlen = PyString_GET_SIZE(input_obj);
2513 result = PyString_FromStringAndSize((char *)NULL, inlen);
2514 if (result == NULL)
2515 return NULL;
2516 output_start = output = PyString_AsString(result);
2517 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002518
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002519 if (dellen == 0 && table != NULL) {
2520 /* If no deletions are required, use faster code */
2521 for (i = inlen; --i >= 0; ) {
2522 c = Py_CHARMASK(*input++);
2523 if (Py_CHARMASK((*output++ = table[c])) != c)
2524 changed = 1;
2525 }
2526 if (changed || !PyString_CheckExact(input_obj))
2527 return result;
2528 Py_DECREF(result);
2529 Py_INCREF(input_obj);
2530 return input_obj;
2531 }
Christian Heimes44720832008-05-26 13:01:01 +00002532
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002533 if (table == NULL) {
2534 for (i = 0; i < 256; i++)
2535 trans_table[i] = Py_CHARMASK(i);
2536 } else {
2537 for (i = 0; i < 256; i++)
2538 trans_table[i] = Py_CHARMASK(table[i]);
2539 }
Christian Heimes44720832008-05-26 13:01:01 +00002540
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002541 for (i = 0; i < dellen; i++)
2542 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002543
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002544 for (i = inlen; --i >= 0; ) {
2545 c = Py_CHARMASK(*input++);
2546 if (trans_table[c] != -1)
2547 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2548 continue;
2549 changed = 1;
2550 }
2551 if (!changed && PyString_CheckExact(input_obj)) {
2552 Py_DECREF(result);
2553 Py_INCREF(input_obj);
2554 return input_obj;
2555 }
2556 /* Fix the size of the resulting string */
2557 if (inlen > 0)
2558 _PyString_Resize(&result, output - output_start);
2559 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002560}
2561
2562
2563#define FORWARD 1
2564#define REVERSE -1
2565
2566/* find and count characters and substrings */
2567
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002568#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002569 ((char *)memchr((const void *)(target), c, target_len))
2570
2571/* String ops must return a string. */
2572/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002573Py_LOCAL(PyStringObject *)
2574return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002575{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002576 if (PyString_CheckExact(self)) {
2577 Py_INCREF(self);
2578 return self;
2579 }
2580 return (PyStringObject *)PyString_FromStringAndSize(
2581 PyString_AS_STRING(self),
2582 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002583}
2584
2585Py_LOCAL_INLINE(Py_ssize_t)
2586countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2587{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002588 Py_ssize_t count=0;
2589 const char *start=target;
2590 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002591
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002592 while ( (start=findchar(start, end-start, c)) != NULL ) {
2593 count++;
2594 if (count >= maxcount)
2595 break;
2596 start += 1;
2597 }
2598 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002599}
2600
2601Py_LOCAL(Py_ssize_t)
2602findstring(const char *target, Py_ssize_t target_len,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002603 const char *pattern, Py_ssize_t pattern_len,
2604 Py_ssize_t start,
2605 Py_ssize_t end,
2606 int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002607{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002608 if (start < 0) {
2609 start += target_len;
2610 if (start < 0)
2611 start = 0;
2612 }
2613 if (end > target_len) {
2614 end = target_len;
2615 } else if (end < 0) {
2616 end += target_len;
2617 if (end < 0)
2618 end = 0;
2619 }
Christian Heimes44720832008-05-26 13:01:01 +00002620
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002621 /* zero-length substrings always match at the first attempt */
2622 if (pattern_len == 0)
2623 return (direction > 0) ? start : end;
Christian Heimes44720832008-05-26 13:01:01 +00002624
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002625 end -= pattern_len;
Christian Heimes44720832008-05-26 13:01:01 +00002626
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002627 if (direction < 0) {
2628 for (; end >= start; end--)
2629 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2630 return end;
2631 } else {
2632 for (; start <= end; start++)
2633 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2634 return start;
2635 }
2636 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00002637}
2638
2639Py_LOCAL_INLINE(Py_ssize_t)
2640countstring(const char *target, Py_ssize_t target_len,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002641 const char *pattern, Py_ssize_t pattern_len,
2642 Py_ssize_t start,
2643 Py_ssize_t end,
2644 int direction, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002645{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002646 Py_ssize_t count=0;
Christian Heimes44720832008-05-26 13:01:01 +00002647
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002648 if (start < 0) {
2649 start += target_len;
2650 if (start < 0)
2651 start = 0;
2652 }
2653 if (end > target_len) {
2654 end = target_len;
2655 } else if (end < 0) {
2656 end += target_len;
2657 if (end < 0)
2658 end = 0;
2659 }
Christian Heimes44720832008-05-26 13:01:01 +00002660
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002661 /* zero-length substrings match everywhere */
2662 if (pattern_len == 0 || maxcount == 0) {
2663 if (target_len+1 < maxcount)
2664 return target_len+1;
2665 return maxcount;
2666 }
Christian Heimes44720832008-05-26 13:01:01 +00002667
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002668 end -= pattern_len;
2669 if (direction < 0) {
2670 for (; (end >= start); end--)
2671 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2672 count++;
2673 if (--maxcount <= 0) break;
2674 end -= pattern_len-1;
2675 }
2676 } else {
2677 for (; (start <= end); start++)
2678 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2679 count++;
2680 if (--maxcount <= 0)
2681 break;
2682 start += pattern_len-1;
2683 }
2684 }
2685 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002686}
2687
2688
2689/* Algorithms for different cases of string replacement */
2690
2691/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002692Py_LOCAL(PyStringObject *)
2693replace_interleave(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002694 const char *to_s, Py_ssize_t to_len,
2695 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002696{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002697 char *self_s, *result_s;
2698 Py_ssize_t self_len, result_len;
2699 Py_ssize_t count, i, product;
2700 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002701
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002702 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002703
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002704 /* 1 at the end plus 1 after every character */
2705 count = self_len+1;
2706 if (maxcount < count)
2707 count = maxcount;
Christian Heimes44720832008-05-26 13:01:01 +00002708
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002709 /* Check for overflow */
2710 /* result_len = count * to_len + self_len; */
2711 product = count * to_len;
2712 if (product / to_len != count) {
2713 PyErr_SetString(PyExc_OverflowError,
2714 "replace string is too long");
2715 return NULL;
2716 }
2717 result_len = product + self_len;
2718 if (result_len < 0) {
2719 PyErr_SetString(PyExc_OverflowError,
2720 "replace string is too long");
2721 return NULL;
2722 }
Christian Heimes44720832008-05-26 13:01:01 +00002723
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002724 if (! (result = (PyStringObject *)
2725 PyString_FromStringAndSize(NULL, result_len)) )
2726 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002727
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002728 self_s = PyString_AS_STRING(self);
2729 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002730
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002731 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002732
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002733 /* Lay the first one down (guaranteed this will occur) */
2734 Py_MEMCPY(result_s, to_s, to_len);
2735 result_s += to_len;
2736 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002737
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002738 for (i=0; i<count; i++) {
2739 *result_s++ = *self_s++;
2740 Py_MEMCPY(result_s, to_s, to_len);
2741 result_s += to_len;
2742 }
2743
2744 /* Copy the rest of the original string */
2745 Py_MEMCPY(result_s, self_s, self_len-i);
2746
2747 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002748}
2749
2750/* Special case for deleting a single character */
2751/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002752Py_LOCAL(PyStringObject *)
2753replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002754 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002755{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002756 char *self_s, *result_s;
2757 char *start, *next, *end;
2758 Py_ssize_t self_len, result_len;
2759 Py_ssize_t count;
2760 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002761
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002762 self_len = PyString_GET_SIZE(self);
2763 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002764
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002765 count = countchar(self_s, self_len, from_c, maxcount);
2766 if (count == 0) {
2767 return return_self(self);
2768 }
Christian Heimes44720832008-05-26 13:01:01 +00002769
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002770 result_len = self_len - count; /* from_len == 1 */
2771 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002772
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002773 if ( (result = (PyStringObject *)
2774 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2775 return NULL;
2776 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002777
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002778 start = self_s;
2779 end = self_s + self_len;
2780 while (count-- > 0) {
2781 next = findchar(start, end-start, from_c);
2782 if (next == NULL)
2783 break;
2784 Py_MEMCPY(result_s, start, next-start);
2785 result_s += (next-start);
2786 start = next+1;
2787 }
2788 Py_MEMCPY(result_s, start, end-start);
2789
2790 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002791}
2792
2793/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2794
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002795Py_LOCAL(PyStringObject *)
2796replace_delete_substring(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002797 const char *from_s, Py_ssize_t from_len,
2798 Py_ssize_t maxcount) {
2799 char *self_s, *result_s;
2800 char *start, *next, *end;
2801 Py_ssize_t self_len, result_len;
2802 Py_ssize_t count, offset;
2803 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002804
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002805 self_len = PyString_GET_SIZE(self);
2806 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002807
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002808 count = countstring(self_s, self_len,
2809 from_s, from_len,
2810 0, self_len, 1,
2811 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002812
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002813 if (count == 0) {
2814 /* no matches */
2815 return return_self(self);
2816 }
Christian Heimes44720832008-05-26 13:01:01 +00002817
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002818 result_len = self_len - (count * from_len);
2819 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002820
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002821 if ( (result = (PyStringObject *)
2822 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2823 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002824
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002825 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002826
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002827 start = self_s;
2828 end = self_s + self_len;
2829 while (count-- > 0) {
2830 offset = findstring(start, end-start,
2831 from_s, from_len,
2832 0, end-start, FORWARD);
2833 if (offset == -1)
2834 break;
2835 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002836
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002837 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002838
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002839 result_s += (next-start);
2840 start = next+from_len;
2841 }
2842 Py_MEMCPY(result_s, start, end-start);
2843 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002844}
2845
2846/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002847Py_LOCAL(PyStringObject *)
2848replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002849 char from_c, char to_c,
2850 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002851{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002852 char *self_s, *result_s, *start, *end, *next;
2853 Py_ssize_t self_len;
2854 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002855
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002856 /* The result string will be the same size */
2857 self_s = PyString_AS_STRING(self);
2858 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002859
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002860 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002861
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002862 if (next == NULL) {
2863 /* No matches; return the original string */
2864 return return_self(self);
2865 }
Christian Heimes44720832008-05-26 13:01:01 +00002866
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002867 /* Need to make a new string */
2868 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2869 if (result == NULL)
2870 return NULL;
2871 result_s = PyString_AS_STRING(result);
2872 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002873
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002874 /* change everything in-place, starting with this one */
2875 start = result_s + (next-self_s);
2876 *start = to_c;
2877 start++;
2878 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002879
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002880 while (--maxcount > 0) {
2881 next = findchar(start, end-start, from_c);
2882 if (next == NULL)
2883 break;
2884 *next = to_c;
2885 start = next+1;
2886 }
Christian Heimes44720832008-05-26 13:01:01 +00002887
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002888 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002889}
2890
2891/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002892Py_LOCAL(PyStringObject *)
2893replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002894 const char *from_s, Py_ssize_t from_len,
2895 const char *to_s, Py_ssize_t to_len,
2896 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002897{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002898 char *result_s, *start, *end;
2899 char *self_s;
2900 Py_ssize_t self_len, offset;
2901 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002902
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002903 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002904
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002905 self_s = PyString_AS_STRING(self);
2906 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002907
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002908 offset = findstring(self_s, self_len,
2909 from_s, from_len,
2910 0, self_len, FORWARD);
2911 if (offset == -1) {
2912 /* No matches; return the original string */
2913 return return_self(self);
2914 }
Christian Heimes44720832008-05-26 13:01:01 +00002915
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002916 /* Need to make a new string */
2917 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2918 if (result == NULL)
2919 return NULL;
2920 result_s = PyString_AS_STRING(result);
2921 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002922
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002923 /* change everything in-place, starting with this one */
2924 start = result_s + offset;
2925 Py_MEMCPY(start, to_s, from_len);
2926 start += from_len;
2927 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002928
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002929 while ( --maxcount > 0) {
2930 offset = findstring(start, end-start,
2931 from_s, from_len,
2932 0, end-start, FORWARD);
2933 if (offset==-1)
2934 break;
2935 Py_MEMCPY(start+offset, to_s, from_len);
2936 start += offset+from_len;
2937 }
Christian Heimes44720832008-05-26 13:01:01 +00002938
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002939 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002940}
2941
2942/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002943Py_LOCAL(PyStringObject *)
2944replace_single_character(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002945 char from_c,
2946 const char *to_s, Py_ssize_t to_len,
2947 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002948{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002949 char *self_s, *result_s;
2950 char *start, *next, *end;
2951 Py_ssize_t self_len, result_len;
2952 Py_ssize_t count, product;
2953 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002954
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002955 self_s = PyString_AS_STRING(self);
2956 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002957
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002958 count = countchar(self_s, self_len, from_c, maxcount);
2959 if (count == 0) {
2960 /* no matches, return unchanged */
2961 return return_self(self);
2962 }
Christian Heimes44720832008-05-26 13:01:01 +00002963
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002964 /* use the difference between current and new, hence the "-1" */
2965 /* result_len = self_len + count * (to_len-1) */
2966 product = count * (to_len-1);
2967 if (product / (to_len-1) != count) {
2968 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2969 return NULL;
2970 }
2971 result_len = self_len + product;
2972 if (result_len < 0) {
2973 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2974 return NULL;
2975 }
Christian Heimes44720832008-05-26 13:01:01 +00002976
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002977 if ( (result = (PyStringObject *)
2978 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2979 return NULL;
2980 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002981
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002982 start = self_s;
2983 end = self_s + self_len;
2984 while (count-- > 0) {
2985 next = findchar(start, end-start, from_c);
2986 if (next == NULL)
2987 break;
Christian Heimes44720832008-05-26 13:01:01 +00002988
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002989 if (next == start) {
2990 /* replace with the 'to' */
2991 Py_MEMCPY(result_s, to_s, to_len);
2992 result_s += to_len;
2993 start += 1;
2994 } else {
2995 /* copy the unchanged old then the 'to' */
2996 Py_MEMCPY(result_s, start, next-start);
2997 result_s += (next-start);
2998 Py_MEMCPY(result_s, to_s, to_len);
2999 result_s += to_len;
3000 start = next+1;
3001 }
3002 }
3003 /* Copy the remainder of the remaining string */
3004 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00003005
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003006 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003007}
3008
3009/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003010Py_LOCAL(PyStringObject *)
3011replace_substring(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003012 const char *from_s, Py_ssize_t from_len,
3013 const char *to_s, Py_ssize_t to_len,
3014 Py_ssize_t maxcount) {
3015 char *self_s, *result_s;
3016 char *start, *next, *end;
3017 Py_ssize_t self_len, result_len;
3018 Py_ssize_t count, offset, product;
3019 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00003020
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003021 self_s = PyString_AS_STRING(self);
3022 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003023
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003024 count = countstring(self_s, self_len,
3025 from_s, from_len,
3026 0, self_len, FORWARD, maxcount);
3027 if (count == 0) {
3028 /* no matches, return unchanged */
3029 return return_self(self);
3030 }
Christian Heimes44720832008-05-26 13:01:01 +00003031
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003032 /* Check for overflow */
3033 /* result_len = self_len + count * (to_len-from_len) */
3034 product = count * (to_len-from_len);
3035 if (product / (to_len-from_len) != count) {
3036 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3037 return NULL;
3038 }
3039 result_len = self_len + product;
3040 if (result_len < 0) {
3041 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
3042 return NULL;
3043 }
Christian Heimes44720832008-05-26 13:01:01 +00003044
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003045 if ( (result = (PyStringObject *)
3046 PyString_FromStringAndSize(NULL, result_len)) == NULL)
3047 return NULL;
3048 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00003049
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003050 start = self_s;
3051 end = self_s + self_len;
3052 while (count-- > 0) {
3053 offset = findstring(start, end-start,
3054 from_s, from_len,
3055 0, end-start, FORWARD);
3056 if (offset == -1)
3057 break;
3058 next = start+offset;
3059 if (next == start) {
3060 /* replace with the 'to' */
3061 Py_MEMCPY(result_s, to_s, to_len);
3062 result_s += to_len;
3063 start += from_len;
3064 } else {
3065 /* copy the unchanged old then the 'to' */
3066 Py_MEMCPY(result_s, start, next-start);
3067 result_s += (next-start);
3068 Py_MEMCPY(result_s, to_s, to_len);
3069 result_s += to_len;
3070 start = next+from_len;
3071 }
3072 }
3073 /* Copy the remainder of the remaining string */
3074 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00003075
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003076 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003077}
3078
3079
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003080Py_LOCAL(PyStringObject *)
3081replace(PyStringObject *self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003082 const char *from_s, Py_ssize_t from_len,
3083 const char *to_s, Py_ssize_t to_len,
3084 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00003085{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003086 if (maxcount < 0) {
3087 maxcount = PY_SSIZE_T_MAX;
3088 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
3089 /* nothing to do; return the original string */
3090 return return_self(self);
3091 }
Christian Heimes44720832008-05-26 13:01:01 +00003092
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003093 if (maxcount == 0 ||
3094 (from_len == 0 && to_len == 0)) {
3095 /* nothing to do; return the original string */
3096 return return_self(self);
3097 }
Christian Heimes44720832008-05-26 13:01:01 +00003098
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003099 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00003100
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003101 if (from_len == 0) {
3102 /* insert the 'to' string everywhere. */
3103 /* >>> "Python".replace("", ".") */
3104 /* '.P.y.t.h.o.n.' */
3105 return replace_interleave(self, to_s, to_len, maxcount);
3106 }
Christian Heimes44720832008-05-26 13:01:01 +00003107
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003108 /* Except for "".replace("", "A") == "A" there is no way beyond this */
3109 /* point for an empty self string to generate a non-empty string */
3110 /* Special case so the remaining code always gets a non-empty string */
3111 if (PyString_GET_SIZE(self) == 0) {
3112 return return_self(self);
3113 }
Christian Heimes44720832008-05-26 13:01:01 +00003114
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003115 if (to_len == 0) {
3116 /* delete all occurances of 'from' string */
3117 if (from_len == 1) {
3118 return replace_delete_single_character(
3119 self, from_s[0], maxcount);
3120 } else {
3121 return replace_delete_substring(self, from_s, from_len, maxcount);
3122 }
3123 }
Christian Heimes44720832008-05-26 13:01:01 +00003124
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003125 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00003126
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003127 if (from_len == to_len) {
3128 if (from_len == 1) {
3129 return replace_single_character_in_place(
3130 self,
3131 from_s[0],
3132 to_s[0],
3133 maxcount);
3134 } else {
3135 return replace_substring_in_place(
3136 self, from_s, from_len, to_s, to_len, maxcount);
3137 }
3138 }
Christian Heimes44720832008-05-26 13:01:01 +00003139
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003140 /* Otherwise use the more generic algorithms */
3141 if (from_len == 1) {
3142 return replace_single_character(self, from_s[0],
3143 to_s, to_len, maxcount);
3144 } else {
3145 /* len('from')>=2, len('to')>=1 */
3146 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3147 }
Christian Heimes44720832008-05-26 13:01:01 +00003148}
3149
3150PyDoc_STRVAR(replace__doc__,
Ezio Melotti6327bf12010-06-26 18:47:01 +00003151"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003152\n\
3153Return a copy of string S with all occurrences of substring\n\
3154old replaced by new. If the optional argument count is\n\
3155given, only the first count occurrences are replaced.");
3156
3157static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003158string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003159{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003160 Py_ssize_t count = -1;
3161 PyObject *from, *to;
3162 const char *from_s, *to_s;
3163 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00003164
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003165 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3166 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003167
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003168 if (PyString_Check(from)) {
3169 from_s = PyString_AS_STRING(from);
3170 from_len = PyString_GET_SIZE(from);
3171 }
Christian Heimes44720832008-05-26 13:01:01 +00003172#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003173 if (PyUnicode_Check(from))
3174 return PyUnicode_Replace((PyObject *)self,
3175 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00003176#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003177 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3178 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003179
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003180 if (PyString_Check(to)) {
3181 to_s = PyString_AS_STRING(to);
3182 to_len = PyString_GET_SIZE(to);
3183 }
Christian Heimes44720832008-05-26 13:01:01 +00003184#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003185 else if (PyUnicode_Check(to))
3186 return PyUnicode_Replace((PyObject *)self,
3187 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00003188#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003189 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3190 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003191
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003192 return (PyObject *)replace((PyStringObject *) self,
3193 from_s, from_len,
3194 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00003195}
3196
3197/** End DALKE **/
3198
3199/* Matches the end (direction >= 0) or start (direction < 0) of self
3200 * against substr, using the start and end arguments. Returns
3201 * -1 on error, 0 if not found and 1 if found.
3202 */
3203Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003204_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003205 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00003206{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003207 Py_ssize_t len = PyString_GET_SIZE(self);
3208 Py_ssize_t slen;
3209 const char* sub;
3210 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00003211
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003212 if (PyString_Check(substr)) {
3213 sub = PyString_AS_STRING(substr);
3214 slen = PyString_GET_SIZE(substr);
3215 }
Christian Heimes44720832008-05-26 13:01:01 +00003216#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003217 else if (PyUnicode_Check(substr))
3218 return PyUnicode_Tailmatch((PyObject *)self,
3219 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00003220#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003221 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3222 return -1;
3223 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003224
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003225 string_adjust_indices(&start, &end, len);
Christian Heimes44720832008-05-26 13:01:01 +00003226
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003227 if (direction < 0) {
3228 /* startswith */
3229 if (start+slen > len)
3230 return 0;
3231 } else {
3232 /* endswith */
3233 if (end-start < slen || start > len)
3234 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003235
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003236 if (end-slen > start)
3237 start = end - slen;
3238 }
3239 if (end-start >= slen)
3240 return ! memcmp(str+start, sub, slen);
3241 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003242}
3243
3244
3245PyDoc_STRVAR(startswith__doc__,
3246"S.startswith(prefix[, start[, end]]) -> bool\n\
3247\n\
3248Return True if S starts with the specified prefix, False otherwise.\n\
3249With optional start, test S beginning at that position.\n\
3250With optional end, stop comparing S at that position.\n\
3251prefix can also be a tuple of strings to try.");
3252
3253static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003254string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003255{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003256 Py_ssize_t start = 0;
3257 Py_ssize_t end = PY_SSIZE_T_MAX;
3258 PyObject *subobj;
3259 int result;
Christian Heimes44720832008-05-26 13:01:01 +00003260
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003261 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3262 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3263 return NULL;
3264 if (PyTuple_Check(subobj)) {
3265 Py_ssize_t i;
3266 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3267 result = _string_tailmatch(self,
3268 PyTuple_GET_ITEM(subobj, i),
3269 start, end, -1);
3270 if (result == -1)
3271 return NULL;
3272 else if (result) {
3273 Py_RETURN_TRUE;
3274 }
3275 }
3276 Py_RETURN_FALSE;
3277 }
3278 result = _string_tailmatch(self, subobj, start, end, -1);
3279 if (result == -1)
3280 return NULL;
3281 else
3282 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00003283}
3284
3285
3286PyDoc_STRVAR(endswith__doc__,
3287"S.endswith(suffix[, start[, end]]) -> bool\n\
3288\n\
3289Return True if S ends with the specified suffix, False otherwise.\n\
3290With optional start, test S beginning at that position.\n\
3291With optional end, stop comparing S at that position.\n\
3292suffix can also be a tuple of strings to try.");
3293
3294static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003295string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003296{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003297 Py_ssize_t start = 0;
3298 Py_ssize_t end = PY_SSIZE_T_MAX;
3299 PyObject *subobj;
3300 int result;
Christian Heimes44720832008-05-26 13:01:01 +00003301
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003302 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3303 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3304 return NULL;
3305 if (PyTuple_Check(subobj)) {
3306 Py_ssize_t i;
3307 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3308 result = _string_tailmatch(self,
3309 PyTuple_GET_ITEM(subobj, i),
3310 start, end, +1);
3311 if (result == -1)
3312 return NULL;
3313 else if (result) {
3314 Py_RETURN_TRUE;
3315 }
3316 }
3317 Py_RETURN_FALSE;
3318 }
3319 result = _string_tailmatch(self, subobj, start, end, +1);
3320 if (result == -1)
3321 return NULL;
3322 else
3323 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00003324}
3325
3326
3327PyDoc_STRVAR(encode__doc__,
3328"S.encode([encoding[,errors]]) -> object\n\
3329\n\
3330Encodes S using the codec registered for encoding. encoding defaults\n\
3331to the default encoding. errors may be given to set a different error\n\
3332handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3333a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3334'xmlcharrefreplace' as well as any other name registered with\n\
3335codecs.register_error that is able to handle UnicodeEncodeErrors.");
3336
3337static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003338string_encode(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003339{
3340 char *encoding = NULL;
3341 char *errors = NULL;
3342 PyObject *v;
3343
3344 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003345 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003346 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003347 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003348 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003349 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003350 PyErr_Format(PyExc_TypeError,
3351 "encoder did not return a string/unicode object "
3352 "(type=%.400s)",
3353 Py_TYPE(v)->tp_name);
3354 Py_DECREF(v);
3355 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003356 }
3357 return v;
3358
3359 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003360 return NULL;
3361}
3362
Christian Heimes44720832008-05-26 13:01:01 +00003363
3364PyDoc_STRVAR(decode__doc__,
3365"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003366\n\
Christian Heimes44720832008-05-26 13:01:01 +00003367Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003368to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003369handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3370a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00003371as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003372able to handle UnicodeDecodeErrors.");
3373
3374static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003375string_decode(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003376{
Christian Heimes44720832008-05-26 13:01:01 +00003377 char *encoding = NULL;
3378 char *errors = NULL;
3379 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003380
3381 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003382 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003383 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003384 if (v == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003385 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003386 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003387 PyErr_Format(PyExc_TypeError,
3388 "decoder did not return a string/unicode object "
3389 "(type=%.400s)",
3390 Py_TYPE(v)->tp_name);
3391 Py_DECREF(v);
3392 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003393 }
3394 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003395
Christian Heimes44720832008-05-26 13:01:01 +00003396 onError:
3397 return NULL;
3398}
3399
3400
3401PyDoc_STRVAR(expandtabs__doc__,
3402"S.expandtabs([tabsize]) -> string\n\
3403\n\
3404Return a copy of S where all tab characters are expanded using spaces.\n\
3405If tabsize is not given, a tab size of 8 characters is assumed.");
3406
3407static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003408string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003409{
3410 const char *e, *p, *qe;
3411 char *q;
3412 Py_ssize_t i, j, incr;
3413 PyObject *u;
3414 int tabsize = 8;
3415
3416 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003417 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003418
3419 /* First pass: determine size of output string */
3420 i = 0; /* chars up to and including most recent \n or \r */
3421 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003422 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3423 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003424 if (*p == '\t') {
3425 if (tabsize > 0) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003426 incr = tabsize - (j % tabsize);
3427 if (j > PY_SSIZE_T_MAX - incr)
3428 goto overflow1;
3429 j += incr;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003430 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003431 }
3432 else {
3433 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003434 goto overflow1;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003435 j++;
3436 if (*p == '\n' || *p == '\r') {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003437 if (i > PY_SSIZE_T_MAX - j)
3438 goto overflow1;
3439 i += j;
3440 j = 0;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003441 }
3442 }
Christian Heimes44720832008-05-26 13:01:01 +00003443
3444 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003445 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003446
3447 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003448 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003449 if (!u)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003450 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003451
3452 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003453 q = PyString_AS_STRING(u); /* next output char */
3454 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003455
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003456 for (p = PyString_AS_STRING(self); p < e; p++)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003457 if (*p == '\t') {
3458 if (tabsize > 0) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003459 i = tabsize - (j % tabsize);
3460 j += i;
3461 while (i--) {
3462 if (q >= qe)
3463 goto overflow2;
3464 *q++ = ' ';
3465 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003466 }
3467 }
3468 else {
3469 if (q >= qe)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003470 goto overflow2;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003471 *q++ = *p;
3472 j++;
3473 if (*p == '\n' || *p == '\r')
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003474 j = 0;
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003475 }
Christian Heimes44720832008-05-26 13:01:01 +00003476
3477 return u;
3478
3479 overflow2:
3480 Py_DECREF(u);
3481 overflow1:
3482 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3483 return NULL;
3484}
3485
3486Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003487pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003488{
3489 PyObject *u;
3490
3491 if (left < 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003492 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003493 if (right < 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003494 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003496 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003497 Py_INCREF(self);
3498 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003499 }
3500
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003501 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003502 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003503 if (u) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003504 if (left)
3505 memset(PyString_AS_STRING(u), fill, left);
3506 Py_MEMCPY(PyString_AS_STRING(u) + left,
3507 PyString_AS_STRING(self),
3508 PyString_GET_SIZE(self));
3509 if (right)
3510 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3511 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003512 }
3513
3514 return u;
3515}
3516
3517PyDoc_STRVAR(ljust__doc__,
3518"S.ljust(width[, fillchar]) -> string\n"
3519"\n"
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00003520"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003521"done using the specified fill character (default is a space).");
3522
3523static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003524string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003525{
3526 Py_ssize_t width;
3527 char fillchar = ' ';
3528
3529 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003530 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003531
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003532 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003533 Py_INCREF(self);
3534 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003535 }
3536
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003537 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003538}
3539
3540
3541PyDoc_STRVAR(rjust__doc__,
3542"S.rjust(width[, fillchar]) -> string\n"
3543"\n"
Benjamin Petersonbe2c0a92008-10-04 21:33:08 +00003544"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003545"done using the specified fill character (default is a space)");
3546
3547static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003548string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003549{
3550 Py_ssize_t width;
3551 char fillchar = ' ';
3552
3553 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003554 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003555
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003556 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003557 Py_INCREF(self);
3558 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003559 }
3560
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003561 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003562}
3563
3564
3565PyDoc_STRVAR(center__doc__,
3566"S.center(width[, fillchar]) -> string\n"
3567"\n"
3568"Return S centered in a string of length width. Padding is\n"
3569"done using the specified fill character (default is a space)");
3570
3571static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003572string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003573{
3574 Py_ssize_t marg, left;
3575 Py_ssize_t width;
3576 char fillchar = ' ';
3577
3578 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003579 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003580
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003581 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003582 Py_INCREF(self);
3583 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003584 }
3585
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003586 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003587 left = marg / 2 + (marg & width & 1);
3588
3589 return pad(self, left, marg - left, fillchar);
3590}
3591
3592PyDoc_STRVAR(zfill__doc__,
3593"S.zfill(width) -> string\n"
3594"\n"
3595"Pad a numeric string S with zeros on the left, to fill a field\n"
3596"of the specified width. The string S is never truncated.");
3597
3598static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003599string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003600{
3601 Py_ssize_t fill;
3602 PyObject *s;
3603 char *p;
3604 Py_ssize_t width;
3605
3606 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003607 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003608
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003609 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003610 if (PyString_CheckExact(self)) {
3611 Py_INCREF(self);
3612 return (PyObject*) self;
3613 }
3614 else
3615 return PyString_FromStringAndSize(
3616 PyString_AS_STRING(self),
3617 PyString_GET_SIZE(self)
3618 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003619 }
3620
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003621 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003622
Christian Heimes44720832008-05-26 13:01:01 +00003623 s = pad(self, fill, 0, '0');
3624
3625 if (s == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003626 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003627
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003628 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003629 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003630 /* move sign to beginning of string */
3631 p[0] = p[fill];
3632 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003633 }
3634
3635 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003636}
3637
Christian Heimes44720832008-05-26 13:01:01 +00003638PyDoc_STRVAR(isspace__doc__,
3639"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003640\n\
Christian Heimes44720832008-05-26 13:01:01 +00003641Return True if all characters in S are whitespace\n\
3642and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003643
Christian Heimes44720832008-05-26 13:01:01 +00003644static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003645string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003646{
Christian Heimes44720832008-05-26 13:01:01 +00003647 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003648 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003649 register const unsigned char *e;
3650
3651 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003652 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003653 isspace(*p))
3654 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003655
3656 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003657 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003658 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003659
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003660 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003661 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003662 if (!isspace(*p))
3663 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003664 }
Christian Heimes44720832008-05-26 13:01:01 +00003665 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003666}
3667
Christian Heimes44720832008-05-26 13:01:01 +00003668
3669PyDoc_STRVAR(isalpha__doc__,
3670"S.isalpha() -> bool\n\
3671\n\
3672Return True if all characters in S are alphabetic\n\
3673and there is at least one character in S, False otherwise.");
3674
3675static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003676string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003677{
Christian Heimes44720832008-05-26 13:01:01 +00003678 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003679 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003680 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003681
Christian Heimes44720832008-05-26 13:01:01 +00003682 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003683 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003684 isalpha(*p))
3685 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003686
3687 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003688 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003689 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003690
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003691 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003692 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003693 if (!isalpha(*p))
3694 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003695 }
Christian Heimes44720832008-05-26 13:01:01 +00003696 return PyBool_FromLong(1);
3697}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003698
Christian Heimes44720832008-05-26 13:01:01 +00003699
3700PyDoc_STRVAR(isalnum__doc__,
3701"S.isalnum() -> bool\n\
3702\n\
3703Return True if all characters in S are alphanumeric\n\
3704and there is at least one character in S, False otherwise.");
3705
3706static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003707string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003708{
3709 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003710 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003711 register const unsigned char *e;
3712
3713 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003714 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003715 isalnum(*p))
3716 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003717
3718 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003719 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003720 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003721
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003722 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003723 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003724 if (!isalnum(*p))
3725 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003726 }
3727 return PyBool_FromLong(1);
3728}
3729
3730
3731PyDoc_STRVAR(isdigit__doc__,
3732"S.isdigit() -> bool\n\
3733\n\
3734Return True if all characters in S are digits\n\
3735and there is at least one character in S, False otherwise.");
3736
3737static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003738string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003739{
3740 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003741 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003742 register const unsigned char *e;
3743
3744 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003745 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003746 isdigit(*p))
3747 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003748
3749 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003750 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003751 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003752
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003753 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003754 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003755 if (!isdigit(*p))
3756 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003757 }
3758 return PyBool_FromLong(1);
3759}
3760
3761
3762PyDoc_STRVAR(islower__doc__,
3763"S.islower() -> bool\n\
3764\n\
3765Return True if all cased characters in S are lowercase and there is\n\
3766at least one cased character in S, False otherwise.");
3767
3768static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003769string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003770{
3771 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003772 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003773 register const unsigned char *e;
3774 int cased;
3775
3776 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003777 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003778 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003779
3780 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003781 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003782 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003783
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003784 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003785 cased = 0;
3786 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003787 if (isupper(*p))
3788 return PyBool_FromLong(0);
3789 else if (!cased && islower(*p))
3790 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003791 }
3792 return PyBool_FromLong(cased);
3793}
3794
3795
3796PyDoc_STRVAR(isupper__doc__,
3797"S.isupper() -> bool\n\
3798\n\
3799Return True if all cased characters in S are uppercase and there is\n\
3800at least one cased character in S, False otherwise.");
3801
3802static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003803string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003804{
3805 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003806 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003807 register const unsigned char *e;
3808 int cased;
3809
3810 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003811 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003812 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003813
3814 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003815 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003816 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003817
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003818 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003819 cased = 0;
3820 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003821 if (islower(*p))
3822 return PyBool_FromLong(0);
3823 else if (!cased && isupper(*p))
3824 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003825 }
3826 return PyBool_FromLong(cased);
3827}
3828
3829
3830PyDoc_STRVAR(istitle__doc__,
3831"S.istitle() -> bool\n\
3832\n\
3833Return True if S is a titlecased string and there is at least one\n\
3834character in S, i.e. uppercase characters may only follow uncased\n\
3835characters and lowercase characters only cased ones. Return False\n\
3836otherwise.");
3837
3838static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003839string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003840{
3841 register const unsigned char *p
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003842 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003843 register const unsigned char *e;
3844 int cased, previous_is_cased;
3845
3846 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003847 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003848 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003849
3850 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003851 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003852 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003853
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003854 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003855 cased = 0;
3856 previous_is_cased = 0;
3857 for (; p < e; p++) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003858 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003859
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003860 if (isupper(ch)) {
3861 if (previous_is_cased)
3862 return PyBool_FromLong(0);
3863 previous_is_cased = 1;
3864 cased = 1;
3865 }
3866 else if (islower(ch)) {
3867 if (!previous_is_cased)
3868 return PyBool_FromLong(0);
3869 previous_is_cased = 1;
3870 cased = 1;
3871 }
3872 else
3873 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003874 }
3875 return PyBool_FromLong(cased);
3876}
3877
3878
3879PyDoc_STRVAR(splitlines__doc__,
3880"S.splitlines([keepends]) -> list of strings\n\
3881\n\
3882Return a list of the lines in S, breaking at line boundaries.\n\
3883Line breaks are not included in the resulting list unless keepends\n\
3884is given and true.");
3885
3886static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003887string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003888{
3889 register Py_ssize_t i;
3890 register Py_ssize_t j;
3891 Py_ssize_t len;
3892 int keepends = 0;
3893 PyObject *list;
3894 PyObject *str;
3895 char *data;
3896
3897 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003898 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003899
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003900 data = PyString_AS_STRING(self);
3901 len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003902
3903 /* This does not use the preallocated list because splitlines is
3904 usually run with hundreds of newlines. The overhead of
3905 switching between PyList_SET_ITEM and append causes about a
3906 2-3% slowdown for that common case. A smarter implementation
3907 could move the if check out, so the SET_ITEMs are done first
3908 and the appends only done when the prealloc buffer is full.
3909 That's too much work for little gain.*/
3910
3911 list = PyList_New(0);
3912 if (!list)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003913 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +00003914
3915 for (i = j = 0; i < len; ) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003916 Py_ssize_t eol;
Christian Heimes44720832008-05-26 13:01:01 +00003917
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003918 /* Find a line and append it */
3919 while (i < len && data[i] != '\n' && data[i] != '\r')
3920 i++;
Christian Heimes44720832008-05-26 13:01:01 +00003921
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003922 /* Skip the line break reading CRLF as one line break */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003923 eol = i;
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003924 if (i < len) {
3925 if (data[i] == '\r' && i + 1 < len &&
3926 data[i+1] == '\n')
3927 i += 2;
3928 else
3929 i++;
3930 if (keepends)
3931 eol = i;
3932 }
3933 SPLIT_APPEND(data, j, eol);
3934 j = i;
Christian Heimes44720832008-05-26 13:01:01 +00003935 }
3936 if (j < len) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003937 SPLIT_APPEND(data, j, len);
Christian Heimes44720832008-05-26 13:01:01 +00003938 }
3939
3940 return list;
3941
3942 onError:
3943 Py_XDECREF(list);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003944 return NULL;
3945}
3946
Robert Schuppenies51df0642008-06-01 16:16:17 +00003947PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003948"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003949
3950static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003951string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003952{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003953 Py_ssize_t res;
3954 res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize;
3955 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003956}
3957
Christian Heimes44720832008-05-26 13:01:01 +00003958#undef SPLIT_APPEND
3959#undef SPLIT_ADD
3960#undef MAX_PREALLOC
3961#undef PREALLOC_SIZE
Christian Heimes1a6387e2008-03-26 12:49:49 +00003962
3963static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003964string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003965{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003966 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003967}
3968
Christian Heimes1a6387e2008-03-26 12:49:49 +00003969
Christian Heimes44720832008-05-26 13:01:01 +00003970#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003971
Christian Heimes44720832008-05-26 13:01:01 +00003972PyDoc_STRVAR(format__doc__,
Georg Brandlc5356992010-08-01 22:02:09 +00003973"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003974\n\
3975");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003976
Eric Smithdc13b792008-05-30 18:10:04 +00003977static PyObject *
3978string__format__(PyObject* self, PyObject* args)
3979{
3980 PyObject *format_spec;
3981 PyObject *result = NULL;
3982 PyObject *tmp = NULL;
3983
3984 /* If 2.x, convert format_spec to the same type as value */
3985 /* This is to allow things like u''.format('') */
3986 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003987 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003988 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003989 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3990 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3991 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003992 }
3993 tmp = PyObject_Str(format_spec);
3994 if (tmp == NULL)
Antoine Pitrou96ec48b2010-06-09 16:31:23 +00003995 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003996 format_spec = tmp;
3997
3998 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00003999 PyString_AS_STRING(format_spec),
4000 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00004001done:
4002 Py_XDECREF(tmp);
4003 return result;
4004}
4005
Christian Heimes44720832008-05-26 13:01:01 +00004006PyDoc_STRVAR(p_format__doc__,
Georg Brandlc5356992010-08-01 22:02:09 +00004007"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00004008\n\
4009");
4010
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00004011
Christian Heimes1a6387e2008-03-26 12:49:49 +00004012static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00004013string_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004014 /* Counterparts of the obsolete stropmodule functions; except
4015 string.maketrans(). */
4016 {"join", (PyCFunction)string_join, METH_O, join__doc__},
4017 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
4018 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
4019 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
4020 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
4021 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
4022 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
4023 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
4024 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
4025 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
4026 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
4027 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
4028 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
4029 capitalize__doc__},
4030 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
4031 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
4032 endswith__doc__},
4033 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
4034 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
4035 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
4036 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
4037 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
4038 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
4039 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
4040 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
4041 {"rpartition", (PyCFunction)string_rpartition, METH_O,
4042 rpartition__doc__},
4043 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
4044 startswith__doc__},
4045 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
4046 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
4047 swapcase__doc__},
4048 {"translate", (PyCFunction)string_translate, METH_VARARGS,
4049 translate__doc__},
4050 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
4051 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
4052 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
4053 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
4054 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
4055 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
4056 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
4057 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
4058 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
4059 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
4060 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
4061 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
4062 expandtabs__doc__},
4063 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
4064 splitlines__doc__},
4065 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
4066 sizeof__doc__},
4067 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
4068 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00004069};
4070
4071static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00004072str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004073
Christian Heimes44720832008-05-26 13:01:01 +00004074static PyObject *
4075string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4076{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004077 PyObject *x = NULL;
4078 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00004079
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004080 if (type != &PyString_Type)
4081 return str_subtype_new(type, args, kwds);
4082 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
4083 return NULL;
4084 if (x == NULL)
4085 return PyString_FromString("");
4086 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00004087}
4088
4089static PyObject *
4090str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4091{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004092 PyObject *tmp, *pnew;
4093 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00004094
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004095 assert(PyType_IsSubtype(type, &PyString_Type));
4096 tmp = string_new(&PyString_Type, args, kwds);
4097 if (tmp == NULL)
4098 return NULL;
4099 assert(PyString_CheckExact(tmp));
4100 n = PyString_GET_SIZE(tmp);
4101 pnew = type->tp_alloc(type, n);
4102 if (pnew != NULL) {
4103 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
4104 ((PyStringObject *)pnew)->ob_shash =
4105 ((PyStringObject *)tmp)->ob_shash;
4106 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
4107 }
4108 Py_DECREF(tmp);
4109 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00004110}
4111
4112static PyObject *
4113basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
4114{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004115 PyErr_SetString(PyExc_TypeError,
4116 "The basestring type cannot be instantiated");
4117 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004118}
4119
4120static PyObject *
4121string_mod(PyObject *v, PyObject *w)
4122{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004123 if (!PyString_Check(v)) {
4124 Py_INCREF(Py_NotImplemented);
4125 return Py_NotImplemented;
4126 }
4127 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00004128}
4129
4130PyDoc_STRVAR(basestring_doc,
4131"Type basestring cannot be instantiated; it is the base for str and unicode.");
4132
4133static PyNumberMethods string_as_number = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004134 0, /*nb_add*/
4135 0, /*nb_subtract*/
4136 0, /*nb_multiply*/
4137 0, /*nb_divide*/
4138 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00004139};
4140
4141
4142PyTypeObject PyBaseString_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004143 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4144 "basestring",
4145 0,
4146 0,
4147 0, /* tp_dealloc */
4148 0, /* tp_print */
4149 0, /* tp_getattr */
4150 0, /* tp_setattr */
4151 0, /* tp_compare */
4152 0, /* tp_repr */
4153 0, /* tp_as_number */
4154 0, /* tp_as_sequence */
4155 0, /* tp_as_mapping */
4156 0, /* tp_hash */
4157 0, /* tp_call */
4158 0, /* tp_str */
4159 0, /* tp_getattro */
4160 0, /* tp_setattro */
4161 0, /* tp_as_buffer */
4162 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
4163 basestring_doc, /* tp_doc */
4164 0, /* tp_traverse */
4165 0, /* tp_clear */
4166 0, /* tp_richcompare */
4167 0, /* tp_weaklistoffset */
4168 0, /* tp_iter */
4169 0, /* tp_iternext */
4170 0, /* tp_methods */
4171 0, /* tp_members */
4172 0, /* tp_getset */
4173 &PyBaseObject_Type, /* tp_base */
4174 0, /* tp_dict */
4175 0, /* tp_descr_get */
4176 0, /* tp_descr_set */
4177 0, /* tp_dictoffset */
4178 0, /* tp_init */
4179 0, /* tp_alloc */
4180 basestring_new, /* tp_new */
4181 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00004182};
4183
4184PyDoc_STRVAR(string_doc,
4185"str(object) -> string\n\
4186\n\
4187Return a nice string representation of the object.\n\
4188If the argument is a string, the return value is the same object.");
4189
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004190PyTypeObject PyString_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004191 PyVarObject_HEAD_INIT(&PyType_Type, 0)
4192 "str",
4193 sizeof(PyStringObject),
4194 sizeof(char),
4195 string_dealloc, /* tp_dealloc */
4196 (printfunc)string_print, /* tp_print */
4197 0, /* tp_getattr */
4198 0, /* tp_setattr */
4199 0, /* tp_compare */
4200 string_repr, /* tp_repr */
4201 &string_as_number, /* tp_as_number */
4202 &string_as_sequence, /* tp_as_sequence */
4203 &string_as_mapping, /* tp_as_mapping */
4204 (hashfunc)string_hash, /* tp_hash */
4205 0, /* tp_call */
4206 string_str, /* tp_str */
4207 PyObject_GenericGetAttr, /* tp_getattro */
4208 0, /* tp_setattro */
4209 &string_as_buffer, /* tp_as_buffer */
4210 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4211 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
4212 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
4213 string_doc, /* tp_doc */
4214 0, /* tp_traverse */
4215 0, /* tp_clear */
4216 (richcmpfunc)string_richcompare, /* tp_richcompare */
4217 0, /* tp_weaklistoffset */
4218 0, /* tp_iter */
4219 0, /* tp_iternext */
4220 string_methods, /* tp_methods */
4221 0, /* tp_members */
4222 0, /* tp_getset */
4223 &PyBaseString_Type, /* tp_base */
4224 0, /* tp_dict */
4225 0, /* tp_descr_get */
4226 0, /* tp_descr_set */
4227 0, /* tp_dictoffset */
4228 0, /* tp_init */
4229 0, /* tp_alloc */
4230 string_new, /* tp_new */
4231 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00004232};
4233
4234void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004235PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004236{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004237 register PyObject *v;
4238 if (*pv == NULL)
4239 return;
4240 if (w == NULL || !PyString_Check(*pv)) {
4241 Py_DECREF(*pv);
4242 *pv = NULL;
4243 return;
4244 }
4245 v = string_concat((PyStringObject *) *pv, w);
4246 Py_DECREF(*pv);
4247 *pv = v;
Christian Heimes44720832008-05-26 13:01:01 +00004248}
4249
4250void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004251PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00004252{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004253 PyString_Concat(pv, w);
4254 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00004255}
4256
4257
4258/* The following function breaks the notion that strings are immutable:
4259 it changes the size of a string. We get away with this only if there
4260 is only one module referencing the object. You can also think of it
4261 as creating a new string object and destroying the old one, only
4262 more efficiently. In any case, don't use this if the string may
4263 already be known to some other part of the code...
4264 Note that if there's not enough memory to resize the string, the original
4265 string object at *pv is deallocated, *pv is set to NULL, an "out of
4266 memory" exception is set, and -1 is returned. Else (on success) 0 is
4267 returned, and the value in *pv may or may not be the same as on input.
4268 As always, an extra byte is allocated for a trailing \0 byte (newsize
4269 does *not* include that), and a trailing \0 byte is stored.
4270*/
4271
4272int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004273_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00004274{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004275 register PyObject *v;
4276 register PyStringObject *sv;
4277 v = *pv;
4278 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
4279 PyString_CHECK_INTERNED(v)) {
4280 *pv = 0;
4281 Py_DECREF(v);
4282 PyErr_BadInternalCall();
4283 return -1;
4284 }
4285 /* XXX UNREF/NEWREF interface should be more symmetrical */
4286 _Py_DEC_REFTOTAL;
4287 _Py_ForgetReference(v);
4288 *pv = (PyObject *)
4289 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4290 if (*pv == NULL) {
4291 PyObject_Del(v);
4292 PyErr_NoMemory();
4293 return -1;
4294 }
4295 _Py_NewReference(*pv);
4296 sv = (PyStringObject *) *pv;
4297 Py_SIZE(sv) = newsize;
4298 sv->ob_sval[newsize] = '\0';
4299 sv->ob_shash = -1; /* invalidate cached hash value */
4300 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00004301}
4302
4303/* Helpers for formatstring */
4304
4305Py_LOCAL_INLINE(PyObject *)
4306getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4307{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004308 Py_ssize_t argidx = *p_argidx;
4309 if (argidx < arglen) {
4310 (*p_argidx)++;
4311 if (arglen < 0)
4312 return args;
4313 else
4314 return PyTuple_GetItem(args, argidx);
4315 }
4316 PyErr_SetString(PyExc_TypeError,
4317 "not enough arguments for format string");
4318 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004319}
4320
4321/* Format codes
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004322 * F_LJUST '-'
4323 * F_SIGN '+'
4324 * F_BLANK ' '
4325 * F_ALT '#'
4326 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00004327 */
4328#define F_LJUST (1<<0)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004329#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00004330#define F_BLANK (1<<2)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004331#define F_ALT (1<<3)
4332#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00004333
4334Py_LOCAL_INLINE(int)
4335formatfloat(char *buf, size_t buflen, int flags,
4336 int prec, int type, PyObject *v)
4337{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004338 /* fmt = '%#.' + `prec` + `type`
4339 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4340 char fmt[20];
4341 double x;
4342 x = PyFloat_AsDouble(v);
4343 if (x == -1.0 && PyErr_Occurred()) {
4344 PyErr_Format(PyExc_TypeError, "float argument required, "
4345 "not %.200s", Py_TYPE(v)->tp_name);
4346 return -1;
4347 }
4348 if (prec < 0)
4349 prec = 6;
Mark Dickinson75be68b2009-08-28 20:57:42 +00004350#if SIZEOF_INT > 4
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004351 /* make sure that the decimal representation of precision really does
4352 need at most 10 digits: platforms with sizeof(int) == 8 exist! */
4353 if (prec > 0x7fffffff) {
4354 PyErr_SetString(PyExc_OverflowError,
4355 "outrageously large precision "
4356 "for formatted float");
4357 return -1;
4358 }
Mark Dickinson75be68b2009-08-28 20:57:42 +00004359#endif
Mark Dickinson87886192009-03-29 16:18:33 +00004360
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004361 if (type == 'f' && fabs(x) >= 1e50)
4362 type = 'g';
4363 /* Worst case length calc to ensure no buffer overrun:
Christian Heimes44720832008-05-26 13:01:01 +00004364
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004365 'g' formats:
4366 fmt = %#.<prec>g
4367 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4368 for any double rep.)
4369 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Christian Heimes44720832008-05-26 13:01:01 +00004370
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004371 'f' formats:
4372 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4373 len = 1 + 50 + 1 + prec = 52 + prec
Christian Heimes44720832008-05-26 13:01:01 +00004374
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004375 If prec=0 the effective precision is 1 (the leading digit is
4376 always given), therefore increase the length by one.
Christian Heimes44720832008-05-26 13:01:01 +00004377
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004378 */
4379 if (((type == 'g' || type == 'G') &&
4380 buflen <= (size_t)10 + (size_t)prec) ||
4381 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4382 PyErr_SetString(PyExc_OverflowError,
4383 "formatted float is too long (precision too large?)");
4384 return -1;
4385 }
4386 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4387 (flags&F_ALT) ? "#" : "",
4388 prec, type);
4389 PyOS_ascii_formatd(buf, buflen, fmt, x);
4390 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004391}
4392
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004393/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00004394 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4395 * Python's regular ints.
4396 * Return value: a new PyString*, or NULL if error.
4397 * . *pbuf is set to point into it,
4398 * *plen set to the # of chars following that.
4399 * Caller must decref it when done using pbuf.
4400 * The string starting at *pbuf is of the form
4401 * "-"? ("0x" | "0X")? digit+
4402 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4403 * set in flags. The case of hex digits will be correct,
4404 * There will be at least prec digits, zero-filled on the left if
4405 * necessary to get that many.
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004406 * val object to be converted
4407 * flags bitmask of format flags; only F_ALT is looked at
4408 * prec minimum number of digits; 0-fill on left if needed
4409 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00004410 *
4411 * CAUTION: o, x and X conversions on regular ints can never
4412 * produce a '-' sign, but can for Python's unbounded ints.
4413 */
4414PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004415_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004416 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00004417{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004418 PyObject *result = NULL;
4419 char *buf;
4420 Py_ssize_t i;
4421 int sign; /* 1 if '-', else 0 */
4422 int len; /* number of characters */
4423 Py_ssize_t llen;
4424 int numdigits; /* len == numnondigits + numdigits */
4425 int numnondigits = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004426
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004427 switch (type) {
4428 case 'd':
4429 case 'u':
4430 result = Py_TYPE(val)->tp_str(val);
4431 break;
4432 case 'o':
4433 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4434 break;
4435 case 'x':
4436 case 'X':
4437 numnondigits = 2;
4438 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4439 break;
4440 default:
4441 assert(!"'type' not in [duoxX]");
4442 }
4443 if (!result)
4444 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004445
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004446 buf = PyString_AsString(result);
4447 if (!buf) {
4448 Py_DECREF(result);
4449 return NULL;
4450 }
Christian Heimes44720832008-05-26 13:01:01 +00004451
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004452 /* To modify the string in-place, there can only be one reference. */
4453 if (Py_REFCNT(result) != 1) {
4454 PyErr_BadInternalCall();
4455 return NULL;
4456 }
4457 llen = PyString_Size(result);
4458 if (llen > INT_MAX) {
4459 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4460 return NULL;
4461 }
4462 len = (int)llen;
4463 if (buf[len-1] == 'L') {
4464 --len;
4465 buf[len] = '\0';
4466 }
4467 sign = buf[0] == '-';
4468 numnondigits += sign;
4469 numdigits = len - numnondigits;
4470 assert(numdigits > 0);
Christian Heimes44720832008-05-26 13:01:01 +00004471
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004472 /* Get rid of base marker unless F_ALT */
4473 if ((flags & F_ALT) == 0) {
4474 /* Need to skip 0x, 0X or 0. */
4475 int skipped = 0;
4476 switch (type) {
4477 case 'o':
4478 assert(buf[sign] == '0');
4479 /* If 0 is only digit, leave it alone. */
4480 if (numdigits > 1) {
4481 skipped = 1;
4482 --numdigits;
4483 }
4484 break;
4485 case 'x':
4486 case 'X':
4487 assert(buf[sign] == '0');
4488 assert(buf[sign + 1] == 'x');
4489 skipped = 2;
4490 numnondigits -= 2;
4491 break;
4492 }
4493 if (skipped) {
4494 buf += skipped;
4495 len -= skipped;
4496 if (sign)
4497 buf[0] = '-';
4498 }
4499 assert(len == numnondigits + numdigits);
4500 assert(numdigits > 0);
4501 }
Christian Heimes44720832008-05-26 13:01:01 +00004502
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004503 /* Fill with leading zeroes to meet minimum width. */
4504 if (prec > numdigits) {
4505 PyObject *r1 = PyString_FromStringAndSize(NULL,
4506 numnondigits + prec);
4507 char *b1;
4508 if (!r1) {
4509 Py_DECREF(result);
4510 return NULL;
4511 }
4512 b1 = PyString_AS_STRING(r1);
4513 for (i = 0; i < numnondigits; ++i)
4514 *b1++ = *buf++;
4515 for (i = 0; i < prec - numdigits; i++)
4516 *b1++ = '0';
4517 for (i = 0; i < numdigits; i++)
4518 *b1++ = *buf++;
4519 *b1 = '\0';
4520 Py_DECREF(result);
4521 result = r1;
4522 buf = PyString_AS_STRING(result);
4523 len = numnondigits + prec;
4524 }
Christian Heimes44720832008-05-26 13:01:01 +00004525
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004526 /* Fix up case for hex conversions. */
4527 if (type == 'X') {
4528 /* Need to convert all lower case letters to upper case.
4529 and need to convert 0x to 0X (and -0x to -0X). */
4530 for (i = 0; i < len; i++)
4531 if (buf[i] >= 'a' && buf[i] <= 'x')
4532 buf[i] -= 'a'-'A';
4533 }
4534 *pbuf = buf;
4535 *plen = len;
4536 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004537}
4538
4539Py_LOCAL_INLINE(int)
4540formatint(char *buf, size_t buflen, int flags,
4541 int prec, int type, PyObject *v)
4542{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004543 /* fmt = '%#.' + `prec` + 'l' + `type`
4544 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4545 + 1 + 1 = 24 */
4546 char fmt[64]; /* plenty big enough! */
4547 char *sign;
4548 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004549
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004550 x = PyInt_AsLong(v);
4551 if (x == -1 && PyErr_Occurred()) {
4552 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4553 Py_TYPE(v)->tp_name);
4554 return -1;
4555 }
4556 if (x < 0 && type == 'u') {
4557 type = 'd';
4558 }
4559 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4560 sign = "-";
4561 else
4562 sign = "";
4563 if (prec < 0)
4564 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004565
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004566 if ((flags & F_ALT) &&
4567 (type == 'x' || type == 'X')) {
4568 /* When converting under %#x or %#X, there are a number
4569 * of issues that cause pain:
4570 * - when 0 is being converted, the C standard leaves off
4571 * the '0x' or '0X', which is inconsistent with other
4572 * %#x/%#X conversions and inconsistent with Python's
4573 * hex() function
4574 * - there are platforms that violate the standard and
4575 * convert 0 with the '0x' or '0X'
4576 * (Metrowerks, Compaq Tru64)
4577 * - there are platforms that give '0x' when converting
4578 * under %#X, but convert 0 in accordance with the
4579 * standard (OS/2 EMX)
4580 *
4581 * We can achieve the desired consistency by inserting our
4582 * own '0x' or '0X' prefix, and substituting %x/%X in place
4583 * of %#x/%#X.
4584 *
4585 * Note that this is the same approach as used in
4586 * formatint() in unicodeobject.c
4587 */
4588 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4589 sign, type, prec, type);
4590 }
4591 else {
4592 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4593 sign, (flags&F_ALT) ? "#" : "",
4594 prec, type);
4595 }
Christian Heimes44720832008-05-26 13:01:01 +00004596
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004597 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4598 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4599 */
4600 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4601 PyErr_SetString(PyExc_OverflowError,
4602 "formatted integer is too long (precision too large?)");
4603 return -1;
4604 }
4605 if (sign[0])
4606 PyOS_snprintf(buf, buflen, fmt, -x);
4607 else
4608 PyOS_snprintf(buf, buflen, fmt, x);
4609 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004610}
4611
4612Py_LOCAL_INLINE(int)
4613formatchar(char *buf, size_t buflen, PyObject *v)
4614{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004615 /* presume that the buffer is at least 2 characters long */
4616 if (PyString_Check(v)) {
4617 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4618 return -1;
4619 }
4620 else {
4621 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4622 return -1;
4623 }
4624 buf[1] = '\0';
4625 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004626}
4627
4628/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4629
4630 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4631 chars are formatted. XXX This is a magic number. Each formatting
4632 routine does bounds checking to ensure no overflow, but a better
4633 solution may be to malloc a buffer of appropriate size for each
4634 format. For now, the current solution is sufficient.
4635*/
4636#define FORMATBUFLEN (size_t)120
4637
4638PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004639PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004640{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004641 char *fmt, *res;
4642 Py_ssize_t arglen, argidx;
4643 Py_ssize_t reslen, rescnt, fmtcnt;
4644 int args_owned = 0;
4645 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004646#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004647 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004648#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004649 PyObject *dict = NULL;
4650 if (format == NULL || !PyString_Check(format) || args == NULL) {
4651 PyErr_BadInternalCall();
4652 return NULL;
4653 }
4654 orig_args = args;
4655 fmt = PyString_AS_STRING(format);
4656 fmtcnt = PyString_GET_SIZE(format);
4657 reslen = rescnt = fmtcnt + 100;
4658 result = PyString_FromStringAndSize((char *)NULL, reslen);
4659 if (result == NULL)
4660 return NULL;
4661 res = PyString_AsString(result);
4662 if (PyTuple_Check(args)) {
4663 arglen = PyTuple_GET_SIZE(args);
4664 argidx = 0;
4665 }
4666 else {
4667 arglen = -1;
4668 argidx = -2;
4669 }
4670 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args) &&
4671 !PyObject_TypeCheck(args, &PyBaseString_Type))
4672 dict = args;
4673 while (--fmtcnt >= 0) {
4674 if (*fmt != '%') {
4675 if (--rescnt < 0) {
4676 rescnt = fmtcnt + 100;
4677 reslen += rescnt;
4678 if (_PyString_Resize(&result, reslen) < 0)
4679 return NULL;
4680 res = PyString_AS_STRING(result)
4681 + reslen - rescnt;
4682 --rescnt;
4683 }
4684 *res++ = *fmt++;
4685 }
4686 else {
4687 /* Got a format specifier */
4688 int flags = 0;
4689 Py_ssize_t width = -1;
4690 int prec = -1;
4691 int c = '\0';
4692 int fill;
4693 int isnumok;
4694 PyObject *v = NULL;
4695 PyObject *temp = NULL;
4696 char *pbuf;
4697 int sign;
4698 Py_ssize_t len;
4699 char formatbuf[FORMATBUFLEN];
4700 /* For format{float,int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004701#ifdef Py_USING_UNICODE
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004702 char *fmt_start = fmt;
4703 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004704#endif
4705
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004706 fmt++;
4707 if (*fmt == '(') {
4708 char *keystart;
4709 Py_ssize_t keylen;
4710 PyObject *key;
4711 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004712
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004713 if (dict == NULL) {
4714 PyErr_SetString(PyExc_TypeError,
4715 "format requires a mapping");
4716 goto error;
4717 }
4718 ++fmt;
4719 --fmtcnt;
4720 keystart = fmt;
4721 /* Skip over balanced parentheses */
4722 while (pcount > 0 && --fmtcnt >= 0) {
4723 if (*fmt == ')')
4724 --pcount;
4725 else if (*fmt == '(')
4726 ++pcount;
4727 fmt++;
4728 }
4729 keylen = fmt - keystart - 1;
4730 if (fmtcnt < 0 || pcount > 0) {
4731 PyErr_SetString(PyExc_ValueError,
4732 "incomplete format key");
4733 goto error;
4734 }
4735 key = PyString_FromStringAndSize(keystart,
4736 keylen);
4737 if (key == NULL)
4738 goto error;
4739 if (args_owned) {
4740 Py_DECREF(args);
4741 args_owned = 0;
4742 }
4743 args = PyObject_GetItem(dict, key);
4744 Py_DECREF(key);
4745 if (args == NULL) {
4746 goto error;
4747 }
4748 args_owned = 1;
4749 arglen = -1;
4750 argidx = -2;
4751 }
4752 while (--fmtcnt >= 0) {
4753 switch (c = *fmt++) {
4754 case '-': flags |= F_LJUST; continue;
4755 case '+': flags |= F_SIGN; continue;
4756 case ' ': flags |= F_BLANK; continue;
4757 case '#': flags |= F_ALT; continue;
4758 case '0': flags |= F_ZERO; continue;
4759 }
4760 break;
4761 }
4762 if (c == '*') {
4763 v = getnextarg(args, arglen, &argidx);
4764 if (v == NULL)
4765 goto error;
4766 if (!PyInt_Check(v)) {
4767 PyErr_SetString(PyExc_TypeError,
4768 "* wants int");
4769 goto error;
4770 }
4771 width = PyInt_AsLong(v);
4772 if (width < 0) {
4773 flags |= F_LJUST;
4774 width = -width;
4775 }
4776 if (--fmtcnt >= 0)
4777 c = *fmt++;
4778 }
4779 else if (c >= 0 && isdigit(c)) {
4780 width = c - '0';
4781 while (--fmtcnt >= 0) {
4782 c = Py_CHARMASK(*fmt++);
4783 if (!isdigit(c))
4784 break;
4785 if ((width*10) / 10 != width) {
4786 PyErr_SetString(
4787 PyExc_ValueError,
4788 "width too big");
4789 goto error;
4790 }
4791 width = width*10 + (c - '0');
4792 }
4793 }
4794 if (c == '.') {
4795 prec = 0;
4796 if (--fmtcnt >= 0)
4797 c = *fmt++;
4798 if (c == '*') {
4799 v = getnextarg(args, arglen, &argidx);
4800 if (v == NULL)
4801 goto error;
4802 if (!PyInt_Check(v)) {
4803 PyErr_SetString(
4804 PyExc_TypeError,
4805 "* wants int");
4806 goto error;
4807 }
4808 prec = PyInt_AsLong(v);
4809 if (prec < 0)
4810 prec = 0;
4811 if (--fmtcnt >= 0)
4812 c = *fmt++;
4813 }
4814 else if (c >= 0 && isdigit(c)) {
4815 prec = c - '0';
4816 while (--fmtcnt >= 0) {
4817 c = Py_CHARMASK(*fmt++);
4818 if (!isdigit(c))
4819 break;
4820 if ((prec*10) / 10 != prec) {
4821 PyErr_SetString(
4822 PyExc_ValueError,
4823 "prec too big");
4824 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004825 }
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00004826 prec = prec*10 + (c - '0');
4827 }
4828 }
4829 } /* prec */
4830 if (fmtcnt >= 0) {
4831 if (c == 'h' || c == 'l' || c == 'L') {
4832 if (--fmtcnt >= 0)
4833 c = *fmt++;
4834 }
4835 }
4836 if (fmtcnt < 0) {
4837 PyErr_SetString(PyExc_ValueError,
4838 "incomplete format");
4839 goto error;
4840 }
4841 if (c != '%') {
4842 v = getnextarg(args, arglen, &argidx);
4843 if (v == NULL)
4844 goto error;
4845 }
4846 sign = 0;
4847 fill = ' ';
4848 switch (c) {
4849 case '%':
4850 pbuf = "%";
4851 len = 1;
4852 break;
4853 case 's':
4854#ifdef Py_USING_UNICODE
4855 if (PyUnicode_Check(v)) {
4856 fmt = fmt_start;
4857 argidx = argidx_start;
4858 goto unicode;
4859 }
4860#endif
4861 temp = _PyObject_Str(v);
4862#ifdef Py_USING_UNICODE
4863 if (temp != NULL && PyUnicode_Check(temp)) {
4864 Py_DECREF(temp);
4865 fmt = fmt_start;
4866 argidx = argidx_start;
4867 goto unicode;
4868 }
4869#endif
4870 /* Fall through */
4871 case 'r':
4872 if (c == 'r')
4873 temp = PyObject_Repr(v);
4874 if (temp == NULL)
4875 goto error;
4876 if (!PyString_Check(temp)) {
4877 PyErr_SetString(PyExc_TypeError,
4878 "%s argument has non-string str()");
4879 Py_DECREF(temp);
4880 goto error;
4881 }
4882 pbuf = PyString_AS_STRING(temp);
4883 len = PyString_GET_SIZE(temp);
4884 if (prec >= 0 && len > prec)
4885 len = prec;
4886 break;
4887 case 'i':
4888 case 'd':
4889 case 'u':
4890 case 'o':
4891 case 'x':
4892 case 'X':
4893 if (c == 'i')
4894 c = 'd';
4895 isnumok = 0;
4896 if (PyNumber_Check(v)) {
4897 PyObject *iobj=NULL;
4898
4899 if (PyInt_Check(v) || (PyLong_Check(v))) {
4900 iobj = v;
4901 Py_INCREF(iobj);
4902 }
4903 else {
4904 iobj = PyNumber_Int(v);
4905 if (iobj==NULL) iobj = PyNumber_Long(v);
4906 }
4907 if (iobj!=NULL) {
4908 if (PyInt_Check(iobj)) {
4909 isnumok = 1;
4910 pbuf = formatbuf;
4911 len = formatint(pbuf,
4912 sizeof(formatbuf),
4913 flags, prec, c, iobj);
4914 Py_DECREF(iobj);
4915 if (len < 0)
4916 goto error;
4917 sign = 1;
4918 }
4919 else if (PyLong_Check(iobj)) {
4920 int ilen;
4921
4922 isnumok = 1;
4923 temp = _PyString_FormatLong(iobj, flags,
4924 prec, c, &pbuf, &ilen);
4925 Py_DECREF(iobj);
4926 len = ilen;
4927 if (!temp)
4928 goto error;
4929 sign = 1;
4930 }
4931 else {
4932 Py_DECREF(iobj);
4933 }
4934 }
4935 }
4936 if (!isnumok) {
4937 PyErr_Format(PyExc_TypeError,
4938 "%%%c format: a number is required, "
4939 "not %.200s", c, Py_TYPE(v)->tp_name);
4940 goto error;
4941 }
4942 if (flags & F_ZERO)
4943 fill = '0';
4944 break;
4945 case 'e':
4946 case 'E':
4947 case 'f':
4948 case 'F':
4949 case 'g':
4950 case 'G':
4951 if (c == 'F')
4952 c = 'f';
4953 pbuf = formatbuf;
4954 len = formatfloat(pbuf, sizeof(formatbuf),
4955 flags, prec, c, v);
4956 if (len < 0)
4957 goto error;
4958 sign = 1;
4959 if (flags & F_ZERO)
4960 fill = '0';
4961 break;
4962 case 'c':
4963#ifdef Py_USING_UNICODE
4964 if (PyUnicode_Check(v)) {
4965 fmt = fmt_start;
4966 argidx = argidx_start;
4967 goto unicode;
4968 }
4969#endif
4970 pbuf = formatbuf;
4971 len = formatchar(pbuf, sizeof(formatbuf), v);
4972 if (len < 0)
4973 goto error;
4974 break;
4975 default:
4976 PyErr_Format(PyExc_ValueError,
4977 "unsupported format character '%c' (0x%x) "
4978 "at index %zd",
4979 c, c,
4980 (Py_ssize_t)(fmt - 1 -
4981 PyString_AsString(format)));
4982 goto error;
4983 }
4984 if (sign) {
4985 if (*pbuf == '-' || *pbuf == '+') {
4986 sign = *pbuf++;
4987 len--;
4988 }
4989 else if (flags & F_SIGN)
4990 sign = '+';
4991 else if (flags & F_BLANK)
4992 sign = ' ';
4993 else
4994 sign = 0;
4995 }
4996 if (width < len)
4997 width = len;
4998 if (rescnt - (sign != 0) < width) {
4999 reslen -= rescnt;
5000 rescnt = width + fmtcnt + 100;
5001 reslen += rescnt;
5002 if (reslen < 0) {
5003 Py_DECREF(result);
5004 Py_XDECREF(temp);
5005 return PyErr_NoMemory();
5006 }
5007 if (_PyString_Resize(&result, reslen) < 0) {
5008 Py_XDECREF(temp);
5009 return NULL;
5010 }
5011 res = PyString_AS_STRING(result)
5012 + reslen - rescnt;
5013 }
5014 if (sign) {
5015 if (fill != ' ')
5016 *res++ = sign;
5017 rescnt--;
5018 if (width > len)
5019 width--;
5020 }
5021 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
5022 assert(pbuf[0] == '0');
5023 assert(pbuf[1] == c);
5024 if (fill != ' ') {
5025 *res++ = *pbuf++;
5026 *res++ = *pbuf++;
5027 }
5028 rescnt -= 2;
5029 width -= 2;
5030 if (width < 0)
5031 width = 0;
5032 len -= 2;
5033 }
5034 if (width > len && !(flags & F_LJUST)) {
5035 do {
5036 --rescnt;
5037 *res++ = fill;
5038 } while (--width > len);
5039 }
5040 if (fill == ' ') {
5041 if (sign)
5042 *res++ = sign;
5043 if ((flags & F_ALT) &&
5044 (c == 'x' || c == 'X')) {
5045 assert(pbuf[0] == '0');
5046 assert(pbuf[1] == c);
5047 *res++ = *pbuf++;
5048 *res++ = *pbuf++;
5049 }
5050 }
5051 Py_MEMCPY(res, pbuf, len);
5052 res += len;
5053 rescnt -= len;
5054 while (--width >= len) {
5055 --rescnt;
5056 *res++ = ' ';
5057 }
5058 if (dict && (argidx < arglen) && c != '%') {
5059 PyErr_SetString(PyExc_TypeError,
5060 "not all arguments converted during string formatting");
5061 Py_XDECREF(temp);
5062 goto error;
5063 }
5064 Py_XDECREF(temp);
5065 } /* '%' */
5066 } /* until end */
5067 if (argidx < arglen && !dict) {
5068 PyErr_SetString(PyExc_TypeError,
5069 "not all arguments converted during string formatting");
5070 goto error;
5071 }
5072 if (args_owned) {
5073 Py_DECREF(args);
5074 }
5075 _PyString_Resize(&result, reslen - rescnt);
5076 return result;
Christian Heimes44720832008-05-26 13:01:01 +00005077
5078#ifdef Py_USING_UNICODE
5079 unicode:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005080 if (args_owned) {
5081 Py_DECREF(args);
5082 args_owned = 0;
5083 }
5084 /* Fiddle args right (remove the first argidx arguments) */
5085 if (PyTuple_Check(orig_args) && argidx > 0) {
5086 PyObject *v;
5087 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
5088 v = PyTuple_New(n);
5089 if (v == NULL)
5090 goto error;
5091 while (--n >= 0) {
5092 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
5093 Py_INCREF(w);
5094 PyTuple_SET_ITEM(v, n, w);
5095 }
5096 args = v;
5097 } else {
5098 Py_INCREF(orig_args);
5099 args = orig_args;
5100 }
5101 args_owned = 1;
5102 /* Take what we have of the result and let the Unicode formatting
5103 function format the rest of the input. */
5104 rescnt = res - PyString_AS_STRING(result);
5105 if (_PyString_Resize(&result, rescnt))
5106 goto error;
5107 fmtcnt = PyString_GET_SIZE(format) - \
5108 (fmt - PyString_AS_STRING(format));
5109 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
5110 if (format == NULL)
5111 goto error;
5112 v = PyUnicode_Format(format, args);
5113 Py_DECREF(format);
5114 if (v == NULL)
5115 goto error;
5116 /* Paste what we have (result) to what the Unicode formatting
5117 function returned (v) and return the result (or error) */
5118 w = PyUnicode_Concat(result, v);
5119 Py_DECREF(result);
5120 Py_DECREF(v);
5121 Py_DECREF(args);
5122 return w;
Christian Heimes44720832008-05-26 13:01:01 +00005123#endif /* Py_USING_UNICODE */
5124
5125 error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005126 Py_DECREF(result);
5127 if (args_owned) {
5128 Py_DECREF(args);
5129 }
5130 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00005131}
5132
5133void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005134PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005135{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005136 register PyStringObject *s = (PyStringObject *)(*p);
5137 PyObject *t;
5138 if (s == NULL || !PyString_Check(s))
5139 Py_FatalError("PyString_InternInPlace: strings only please!");
5140 /* If it's a string subclass, we don't really know what putting
5141 it in the interned dict might do. */
5142 if (!PyString_CheckExact(s))
5143 return;
5144 if (PyString_CHECK_INTERNED(s))
5145 return;
5146 if (interned == NULL) {
5147 interned = PyDict_New();
5148 if (interned == NULL) {
5149 PyErr_Clear(); /* Don't leave an exception */
5150 return;
5151 }
5152 }
5153 t = PyDict_GetItem(interned, (PyObject *)s);
5154 if (t) {
5155 Py_INCREF(t);
5156 Py_DECREF(*p);
5157 *p = t;
5158 return;
5159 }
Christian Heimes44720832008-05-26 13:01:01 +00005160
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005161 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
5162 PyErr_Clear();
5163 return;
5164 }
5165 /* The two references in interned are not counted by refcnt.
5166 The string deallocator will take care of this */
5167 Py_REFCNT(s) -= 2;
5168 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00005169}
5170
5171void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005172PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00005173{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005174 PyString_InternInPlace(p);
5175 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
5176 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
5177 Py_INCREF(*p);
5178 }
Christian Heimes44720832008-05-26 13:01:01 +00005179}
5180
5181
5182PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005183PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00005184{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005185 PyObject *s = PyString_FromString(cp);
5186 if (s == NULL)
5187 return NULL;
5188 PyString_InternInPlace(&s);
5189 return s;
Christian Heimes44720832008-05-26 13:01:01 +00005190}
5191
5192void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00005193PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00005194{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005195 int i;
5196 for (i = 0; i < UCHAR_MAX + 1; i++) {
5197 Py_XDECREF(characters[i]);
5198 characters[i] = NULL;
5199 }
5200 Py_XDECREF(nullstring);
5201 nullstring = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00005202}
5203
5204void _Py_ReleaseInternedStrings(void)
5205{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005206 PyObject *keys;
5207 PyStringObject *s;
5208 Py_ssize_t i, n;
5209 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00005210
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005211 if (interned == NULL || !PyDict_Check(interned))
5212 return;
5213 keys = PyDict_Keys(interned);
5214 if (keys == NULL || !PyList_Check(keys)) {
5215 PyErr_Clear();
5216 return;
5217 }
Christian Heimes44720832008-05-26 13:01:01 +00005218
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005219 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
5220 detector, interned strings are not forcibly deallocated; rather, we
5221 give them their stolen references back, and then clear and DECREF
5222 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00005223
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00005224 n = PyList_GET_SIZE(keys);
5225 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
5226 n);
5227 for (i = 0; i < n; i++) {
5228 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
5229 switch (s->ob_sstate) {
5230 case SSTATE_NOT_INTERNED:
5231 /* XXX Shouldn't happen */
5232 break;
5233 case SSTATE_INTERNED_IMMORTAL:
5234 Py_REFCNT(s) += 1;
5235 immortal_size += Py_SIZE(s);
5236 break;
5237 case SSTATE_INTERNED_MORTAL:
5238 Py_REFCNT(s) += 2;
5239 mortal_size += Py_SIZE(s);
5240 break;
5241 default:
5242 Py_FatalError("Inconsistent interned string state.");
5243 }
5244 s->ob_sstate = SSTATE_NOT_INTERNED;
5245 }
5246 fprintf(stderr, "total size of all interned strings: "
5247 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
5248 "mortal/immortal\n", mortal_size, immortal_size);
5249 Py_DECREF(keys);
5250 PyDict_Clear(interned);
5251 Py_DECREF(interned);
5252 interned = NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00005253}