blob: ef4f8c1e19e4dfddea5d515b83f1cd1167905936 [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Martin Panter200a6152016-05-30 04:04:50 +000038 For PyString_FromStringAndSize(), the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
Martin Panter646b5282016-06-21 23:58:05 +000091 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
Antoine Pitrouc83ea132010-05-09 14:46:46 +000092 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
Martin Panter646b5282016-06-21 23:58:05 +0000146 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300452 v = _PyCodec_DecodeText(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300532 v = _PyCodec_EncodeText(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
615 Py_ssize_t newlen = recode_encoding ? 4*len:len;
616 v = PyString_FromStringAndSize((char *)NULL, newlen);
617 if (v == NULL)
618 return NULL;
619 p = buf = PyString_AsString(v);
620 end = s + len;
621 while (s < end) {
622 if (*s != '\\') {
623 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000624#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000625 if (recode_encoding && (*s & 0x80)) {
626 PyObject *u, *w;
627 char *r;
628 const char* t;
629 Py_ssize_t rn;
630 t = s;
631 /* Decode non-ASCII bytes as UTF-8. */
632 while (t < end && (*t & 0x80)) t++;
633 u = PyUnicode_DecodeUTF8(s, t - s, errors);
634 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000635
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000636 /* Recode them in target encoding. */
637 w = PyUnicode_AsEncodedString(
638 u, recode_encoding, errors);
639 Py_DECREF(u);
640 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Append bytes to output buffer. */
643 assert(PyString_Check(w));
644 r = PyString_AS_STRING(w);
645 rn = PyString_GET_SIZE(w);
646 Py_MEMCPY(p, r, rn);
647 p += rn;
648 Py_DECREF(w);
649 s = t;
650 } else {
651 *p++ = *s++;
652 }
Christian Heimes44720832008-05-26 13:01:01 +0000653#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000654 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000655#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 continue;
657 }
658 s++;
659 if (s==end) {
660 PyErr_SetString(PyExc_ValueError,
661 "Trailing \\ in string");
662 goto failed;
663 }
664 switch (*s++) {
665 /* XXX This assumes ASCII! */
666 case '\n': break;
667 case '\\': *p++ = '\\'; break;
668 case '\'': *p++ = '\''; break;
669 case '\"': *p++ = '\"'; break;
670 case 'b': *p++ = '\b'; break;
671 case 'f': *p++ = '\014'; break; /* FF */
672 case 't': *p++ = '\t'; break;
673 case 'n': *p++ = '\n'; break;
674 case 'r': *p++ = '\r'; break;
675 case 'v': *p++ = '\013'; break; /* VT */
676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677 case '0': case '1': case '2': case '3':
678 case '4': case '5': case '6': case '7':
679 c = s[-1] - '0';
680 if (s < end && '0' <= *s && *s <= '7') {
681 c = (c<<3) + *s++ - '0';
682 if (s < end && '0' <= *s && *s <= '7')
683 c = (c<<3) + *s++ - '0';
684 }
685 *p++ = c;
686 break;
687 case 'x':
688 if (s+1 < end &&
689 isxdigit(Py_CHARMASK(s[0])) &&
690 isxdigit(Py_CHARMASK(s[1])))
691 {
692 unsigned int x = 0;
693 c = Py_CHARMASK(*s);
694 s++;
695 if (isdigit(c))
696 x = c - '0';
697 else if (islower(c))
698 x = 10 + c - 'a';
699 else
700 x = 10 + c - 'A';
701 x = x << 4;
702 c = Py_CHARMASK(*s);
703 s++;
704 if (isdigit(c))
705 x += c - '0';
706 else if (islower(c))
707 x += 10 + c - 'a';
708 else
709 x += 10 + c - 'A';
710 *p++ = x;
711 break;
712 }
713 if (!errors || strcmp(errors, "strict") == 0) {
714 PyErr_SetString(PyExc_ValueError,
715 "invalid \\x escape");
716 goto failed;
717 }
718 if (strcmp(errors, "replace") == 0) {
719 *p++ = '?';
720 } else if (strcmp(errors, "ignore") == 0)
721 /* do nothing */;
722 else {
723 PyErr_Format(PyExc_ValueError,
724 "decoding error; "
725 "unknown error handling code: %.400s",
726 errors);
727 goto failed;
728 }
Serhiy Storchaka01b3a082013-01-25 23:30:50 +0200729 /* skip \x */
730 if (s < end && isxdigit(Py_CHARMASK(s[0])))
731 s++; /* and a hexdigit */
732 break;
Christian Heimes44720832008-05-26 13:01:01 +0000733#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000734 case 'u':
735 case 'U':
736 case 'N':
737 if (unicode) {
738 PyErr_SetString(PyExc_ValueError,
739 "Unicode escapes not legal "
740 "when Unicode disabled");
741 goto failed;
742 }
Christian Heimes44720832008-05-26 13:01:01 +0000743#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000744 default:
745 *p++ = '\\';
746 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200747 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000748 UTF-8 bytes may follow. */
749 }
750 }
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000751 if (p-buf < newlen)
752 _PyString_Resize(&v, p - buf); /* v is cleared on error */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000753 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000754 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000755 Py_DECREF(v);
756 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000757}
758
759/* -------------------------------------------------------------------- */
760/* object api */
761
Christian Heimes1a6387e2008-03-26 12:49:49 +0000762static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000763string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000764{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000765 char *s;
766 Py_ssize_t len;
767 if (PyString_AsStringAndSize(op, &s, &len))
768 return -1;
769 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770}
771
Christian Heimes44720832008-05-26 13:01:01 +0000772static /*const*/ char *
773string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000774{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000775 char *s;
776 Py_ssize_t len;
777 if (PyString_AsStringAndSize(op, &s, &len))
778 return NULL;
779 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780}
781
782Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000783PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000784{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000785 if (!PyString_Check(op))
786 return string_getsize(op);
787 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000788}
789
Christian Heimes44720832008-05-26 13:01:01 +0000790/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000791PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000792{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000793 if (!PyString_Check(op))
794 return string_getbuffer(op);
795 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000796}
797
798int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000799PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000800 register char **s,
801 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000802{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000803 if (s == NULL) {
804 PyErr_BadInternalCall();
805 return -1;
806 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000807
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000808 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000809#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000810 if (PyUnicode_Check(obj)) {
811 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812 if (obj == NULL)
813 return -1;
814 }
815 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000816#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000817 {
818 PyErr_Format(PyExc_TypeError,
819 "expected string or Unicode object, "
820 "%.200s found", Py_TYPE(obj)->tp_name);
821 return -1;
822 }
823 }
Christian Heimes44720832008-05-26 13:01:01 +0000824
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000825 *s = PyString_AS_STRING(obj);
826 if (len != NULL)
827 *len = PyString_GET_SIZE(obj);
828 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829 PyErr_SetString(PyExc_TypeError,
830 "expected string without null bytes");
831 return -1;
832 }
833 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000834}
835
Christian Heimes1a6387e2008-03-26 12:49:49 +0000836/* -------------------------------------------------------------------- */
837/* Methods */
838
Christian Heimes44720832008-05-26 13:01:01 +0000839#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000840#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000841
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842#include "stringlib/count.h"
843#include "stringlib/find.h"
844#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000845#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000847#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000848#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000849
Christian Heimes1a6387e2008-03-26 12:49:49 +0000850
851
852static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000853string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000854{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 Py_ssize_t i, str_len;
856 char c;
857 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000858
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000859 /* XXX Ought to check for interrupts when writing long strings */
860 if (! PyString_CheckExact(op)) {
861 int ret;
862 /* A str subclass may have its own __str__ method. */
863 op = (PyStringObject *) PyObject_Str((PyObject *)op);
864 if (op == NULL)
865 return -1;
866 ret = string_print(op, fp, flags);
867 Py_DECREF(op);
868 return ret;
869 }
870 if (flags & Py_PRINT_RAW) {
871 char *data = op->ob_sval;
872 Py_ssize_t size = Py_SIZE(op);
873 Py_BEGIN_ALLOW_THREADS
874 while (size > INT_MAX) {
875 /* Very long strings cannot be written atomically.
876 * But don't write exactly INT_MAX bytes at a time
877 * to avoid memory aligment issues.
878 */
879 const int chunk_size = INT_MAX & ~0x3FFF;
880 fwrite(data, 1, chunk_size, fp);
881 data += chunk_size;
882 size -= chunk_size;
883 }
Christian Heimes44720832008-05-26 13:01:01 +0000884#ifdef __VMS
Ronald Oussoren3687e802013-07-11 13:33:55 +0200885 if (size) fwrite(data, (size_t)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000886#else
Ronald Oussoren3687e802013-07-11 13:33:55 +0200887 fwrite(data, 1, (size_t)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000888#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 Py_END_ALLOW_THREADS
890 return 0;
891 }
Christian Heimes44720832008-05-26 13:01:01 +0000892
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000893 /* figure out which quote to use; single is preferred */
894 quote = '\'';
895 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896 !memchr(op->ob_sval, '"', Py_SIZE(op)))
897 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000898
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000899 str_len = Py_SIZE(op);
900 Py_BEGIN_ALLOW_THREADS
901 fputc(quote, fp);
902 for (i = 0; i < str_len; i++) {
903 /* Since strings are immutable and the caller should have a
Martin Panter3d36f0f2016-07-28 02:37:04 +0000904 reference, accessing the internal buffer should not be an issue
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000905 with the GIL released. */
906 c = op->ob_sval[i];
907 if (c == quote || c == '\\')
908 fprintf(fp, "\\%c", c);
909 else if (c == '\t')
910 fprintf(fp, "\\t");
911 else if (c == '\n')
912 fprintf(fp, "\\n");
913 else if (c == '\r')
914 fprintf(fp, "\\r");
915 else if (c < ' ' || c >= 0x7f)
916 fprintf(fp, "\\x%02x", c & 0xff);
917 else
918 fputc(c, fp);
919 }
920 fputc(quote, fp);
921 Py_END_ALLOW_THREADS
922 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000923}
924
Christian Heimes44720832008-05-26 13:01:01 +0000925PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000926PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000927{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000928 register PyStringObject* op = (PyStringObject*) obj;
Benjamin Petersonf8c4b3a2014-09-29 19:01:18 -0400929 size_t newsize;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000930 PyObject *v;
Benjamin Petersonf8c4b3a2014-09-29 19:01:18 -0400931 if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000932 PyErr_SetString(PyExc_OverflowError,
933 "string is too large to make repr");
934 return NULL;
935 }
Benjamin Petersonf8c4b3a2014-09-29 19:01:18 -0400936 newsize = 2 + 4*Py_SIZE(op);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000937 v = PyString_FromStringAndSize((char *)NULL, newsize);
938 if (v == NULL) {
939 return NULL;
940 }
941 else {
942 register Py_ssize_t i;
943 register char c;
944 register char *p;
945 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000946
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000947 /* figure out which quote to use; single is preferred */
948 quote = '\'';
949 if (smartquotes &&
950 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
951 !memchr(op->ob_sval, '"', Py_SIZE(op)))
952 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000953
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000954 p = PyString_AS_STRING(v);
955 *p++ = quote;
956 for (i = 0; i < Py_SIZE(op); i++) {
957 /* There's at least enough room for a hex escape
958 and a closing quote. */
959 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
960 c = op->ob_sval[i];
961 if (c == quote || c == '\\')
962 *p++ = '\\', *p++ = c;
963 else if (c == '\t')
964 *p++ = '\\', *p++ = 't';
965 else if (c == '\n')
966 *p++ = '\\', *p++ = 'n';
967 else if (c == '\r')
968 *p++ = '\\', *p++ = 'r';
969 else if (c < ' ' || c >= 0x7f) {
970 /* For performance, we don't want to call
971 PyOS_snprintf here (extra layers of
972 function call). */
973 sprintf(p, "\\x%02x", c & 0xff);
974 p += 4;
975 }
976 else
977 *p++ = c;
978 }
979 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
980 *p++ = quote;
981 *p = '\0';
982 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
983 return NULL;
984 return v;
985 }
Christian Heimes44720832008-05-26 13:01:01 +0000986}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000987
988static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000989string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000990{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000991 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000992}
993
Christian Heimes1a6387e2008-03-26 12:49:49 +0000994static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000995string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000996{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000997 assert(PyString_Check(s));
998 if (PyString_CheckExact(s)) {
999 Py_INCREF(s);
1000 return s;
1001 }
1002 else {
1003 /* Subtype -- return genuine string with the same value. */
1004 PyStringObject *t = (PyStringObject *) s;
1005 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1006 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001007}
1008
Christian Heimes44720832008-05-26 13:01:01 +00001009static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001010string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001011{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001012 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001013}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001014
Christian Heimes44720832008-05-26 13:01:01 +00001015static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001016string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001017{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001018 register Py_ssize_t size;
1019 register PyStringObject *op;
1020 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001021#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001022 if (PyUnicode_Check(bb))
1023 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001024#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001025 if (PyByteArray_Check(bb))
1026 return PyByteArray_Concat((PyObject *)a, bb);
1027 PyErr_Format(PyExc_TypeError,
1028 "cannot concatenate 'str' and '%.200s' objects",
1029 Py_TYPE(bb)->tp_name);
1030 return NULL;
1031 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001032#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001033 /* Optimize cases with empty left or right operand */
1034 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1035 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1036 if (Py_SIZE(a) == 0) {
1037 Py_INCREF(bb);
1038 return bb;
1039 }
1040 Py_INCREF(a);
1041 return (PyObject *)a;
1042 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001043 /* Check that string sizes are not negative, to prevent an
1044 overflow in cases where we are passed incorrectly-created
1045 strings with negative lengths (due to a bug in other code).
1046 */
1047 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1048 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049 PyErr_SetString(PyExc_OverflowError,
1050 "strings are too large to concat");
1051 return NULL;
1052 }
Serhiy Storchaka373773d2016-07-12 15:46:57 +03001053 size = Py_SIZE(a) + Py_SIZE(b);
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001054
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001055 /* Inline PyObject_NewVar */
1056 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1057 PyErr_SetString(PyExc_OverflowError,
1058 "strings are too large to concat");
1059 return NULL;
1060 }
1061 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1062 if (op == NULL)
1063 return PyErr_NoMemory();
Martin Panter646b5282016-06-21 23:58:05 +00001064 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001065 op->ob_shash = -1;
1066 op->ob_sstate = SSTATE_NOT_INTERNED;
1067 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1068 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1069 op->ob_sval[size] = '\0';
1070 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001071#undef b
1072}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001073
Christian Heimes44720832008-05-26 13:01:01 +00001074static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001075string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001076{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001077 register Py_ssize_t i;
1078 register Py_ssize_t j;
1079 register Py_ssize_t size;
1080 register PyStringObject *op;
1081 size_t nbytes;
1082 if (n < 0)
1083 n = 0;
Serhiy Storchaka373773d2016-07-12 15:46:57 +03001084 /* watch out for overflows: the size can overflow Py_ssize_t,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001085 * and the # of bytes needed can overflow size_t
1086 */
Serhiy Storchaka373773d2016-07-12 15:46:57 +03001087 if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001088 PyErr_SetString(PyExc_OverflowError,
1089 "repeated string is too long");
1090 return NULL;
1091 }
Serhiy Storchaka373773d2016-07-12 15:46:57 +03001092 size = Py_SIZE(a) * n;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001093 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1094 Py_INCREF(a);
1095 return (PyObject *)a;
1096 }
1097 nbytes = (size_t)size;
1098 if (nbytes + PyStringObject_SIZE <= nbytes) {
1099 PyErr_SetString(PyExc_OverflowError,
1100 "repeated string is too long");
1101 return NULL;
1102 }
1103 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1104 if (op == NULL)
1105 return PyErr_NoMemory();
Martin Panter646b5282016-06-21 23:58:05 +00001106 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001107 op->ob_shash = -1;
1108 op->ob_sstate = SSTATE_NOT_INTERNED;
1109 op->ob_sval[size] = '\0';
1110 if (Py_SIZE(a) == 1 && n > 0) {
1111 memset(op->ob_sval, a->ob_sval[0] , n);
1112 return (PyObject *) op;
1113 }
1114 i = 0;
1115 if (i < size) {
1116 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1117 i = Py_SIZE(a);
1118 }
1119 while (i < size) {
1120 j = (i <= size-i) ? i : size-i;
1121 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1122 i += j;
1123 }
1124 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001125}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001126
Christian Heimes44720832008-05-26 13:01:01 +00001127/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1128
1129static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001130string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001131 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001132 /* j -- may be negative! */
1133{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001134 if (i < 0)
1135 i = 0;
1136 if (j < 0)
1137 j = 0; /* Avoid signed/unsigned bug in next line */
1138 if (j > Py_SIZE(a))
1139 j = Py_SIZE(a);
1140 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1141 /* It's the same as a */
1142 Py_INCREF(a);
1143 return (PyObject *)a;
1144 }
1145 if (j < i)
1146 j = i;
1147 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001148}
1149
1150static int
1151string_contains(PyObject *str_obj, PyObject *sub_obj)
1152{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001153 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001154#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001155 if (PyUnicode_Check(sub_obj))
1156 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001157#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001158 if (!PyString_Check(sub_obj)) {
1159 PyErr_Format(PyExc_TypeError,
1160 "'in <string>' requires string as left operand, "
1161 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1162 return -1;
1163 }
1164 }
Christian Heimes44720832008-05-26 13:01:01 +00001165
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001166 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001167}
1168
1169static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001170string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001171{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001172 char pchar;
1173 PyObject *v;
1174 if (i < 0 || i >= Py_SIZE(a)) {
1175 PyErr_SetString(PyExc_IndexError, "string index out of range");
1176 return NULL;
1177 }
1178 pchar = a->ob_sval[i];
1179 v = (PyObject *)characters[pchar & UCHAR_MAX];
1180 if (v == NULL)
1181 v = PyString_FromStringAndSize(&pchar, 1);
1182 else {
Christian Heimes44720832008-05-26 13:01:01 +00001183#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001184 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001185#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001186 Py_INCREF(v);
1187 }
1188 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001189}
1190
1191static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001192string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001193{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001194 int c;
1195 Py_ssize_t len_a, len_b;
1196 Py_ssize_t min_len;
1197 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001198
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001199 /* Make sure both arguments are strings. */
1200 if (!(PyString_Check(a) && PyString_Check(b))) {
1201 result = Py_NotImplemented;
1202 goto out;
1203 }
1204 if (a == b) {
1205 switch (op) {
1206 case Py_EQ:case Py_LE:case Py_GE:
1207 result = Py_True;
1208 goto out;
1209 case Py_NE:case Py_LT:case Py_GT:
1210 result = Py_False;
1211 goto out;
1212 }
1213 }
1214 if (op == Py_EQ) {
1215 /* Supporting Py_NE here as well does not save
1216 much time, since Py_NE is rarely used. */
1217 if (Py_SIZE(a) == Py_SIZE(b)
1218 && (a->ob_sval[0] == b->ob_sval[0]
1219 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1220 result = Py_True;
1221 } else {
1222 result = Py_False;
1223 }
1224 goto out;
1225 }
1226 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1227 min_len = (len_a < len_b) ? len_a : len_b;
1228 if (min_len > 0) {
1229 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1230 if (c==0)
1231 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1232 } else
1233 c = 0;
1234 if (c == 0)
1235 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1236 switch (op) {
1237 case Py_LT: c = c < 0; break;
1238 case Py_LE: c = c <= 0; break;
1239 case Py_EQ: assert(0); break; /* unreachable */
1240 case Py_NE: c = c != 0; break;
1241 case Py_GT: c = c > 0; break;
1242 case Py_GE: c = c >= 0; break;
1243 default:
1244 result = Py_NotImplemented;
1245 goto out;
1246 }
1247 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001248 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001249 Py_INCREF(result);
1250 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001251}
1252
1253int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001254_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001255{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001256 PyStringObject *a = (PyStringObject*) o1;
1257 PyStringObject *b = (PyStringObject*) o2;
1258 return Py_SIZE(a) == Py_SIZE(b)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001259 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001260}
1261
1262static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001263string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001264{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001265 register Py_ssize_t len;
1266 register unsigned char *p;
1267 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001268
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001269#ifdef Py_DEBUG
Benjamin Peterson26da9202012-02-21 11:08:50 -05001270 assert(_Py_HashSecret_Initialized);
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001271#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001272 if (a->ob_shash != -1)
1273 return a->ob_shash;
1274 len = Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001275 /*
1276 We make the hash of the empty string be 0, rather than using
1277 (prefix ^ suffix), since this slightly obfuscates the hash secret
1278 */
1279 if (len == 0) {
1280 a->ob_shash = 0;
1281 return 0;
1282 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001283 p = (unsigned char *) a->ob_sval;
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001284 x = _Py_HashSecret.prefix;
1285 x ^= *p << 7;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001286 while (--len >= 0)
1287 x = (1000003*x) ^ *p++;
1288 x ^= Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001289 x ^= _Py_HashSecret.suffix;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001290 if (x == -1)
1291 x = -2;
1292 a->ob_shash = x;
1293 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001294}
1295
1296static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001297string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001298{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001299 if (PyIndex_Check(item)) {
1300 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1301 if (i == -1 && PyErr_Occurred())
1302 return NULL;
1303 if (i < 0)
1304 i += PyString_GET_SIZE(self);
1305 return string_item(self, i);
1306 }
1307 else if (PySlice_Check(item)) {
1308 Py_ssize_t start, stop, step, slicelength, cur, i;
1309 char* source_buf;
1310 char* result_buf;
1311 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001312
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001313 if (_PySlice_Unpack((PySliceObject *)item, &start, &stop, &step) < 0) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001314 return NULL;
1315 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001316 slicelength = _PySlice_AdjustIndices(PyString_GET_SIZE(self), &start,
1317 &stop, step);
Christian Heimes44720832008-05-26 13:01:01 +00001318
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001319 if (slicelength <= 0) {
1320 return PyString_FromStringAndSize("", 0);
1321 }
1322 else if (start == 0 && step == 1 &&
1323 slicelength == PyString_GET_SIZE(self) &&
1324 PyString_CheckExact(self)) {
1325 Py_INCREF(self);
1326 return (PyObject *)self;
1327 }
1328 else if (step == 1) {
1329 return PyString_FromStringAndSize(
1330 PyString_AS_STRING(self) + start,
1331 slicelength);
1332 }
1333 else {
1334 source_buf = PyString_AsString((PyObject*)self);
1335 result_buf = (char *)PyMem_Malloc(slicelength);
1336 if (result_buf == NULL)
1337 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001338
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001339 for (cur = start, i = 0; i < slicelength;
1340 cur += step, i++) {
1341 result_buf[i] = source_buf[cur];
1342 }
Christian Heimes44720832008-05-26 13:01:01 +00001343
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001344 result = PyString_FromStringAndSize(result_buf,
1345 slicelength);
1346 PyMem_Free(result_buf);
1347 return result;
1348 }
1349 }
1350 else {
1351 PyErr_Format(PyExc_TypeError,
1352 "string indices must be integers, not %.200s",
1353 Py_TYPE(item)->tp_name);
1354 return NULL;
1355 }
Christian Heimes44720832008-05-26 13:01:01 +00001356}
1357
1358static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001359string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001360{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001361 if ( index != 0 ) {
1362 PyErr_SetString(PyExc_SystemError,
1363 "accessing non-existent string segment");
1364 return -1;
1365 }
1366 *ptr = (void *)self->ob_sval;
1367 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001368}
1369
1370static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001371string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001372{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001373 PyErr_SetString(PyExc_TypeError,
1374 "Cannot use string as modifiable buffer");
1375 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001376}
1377
1378static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001379string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001380{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001381 if ( lenp )
1382 *lenp = Py_SIZE(self);
1383 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001384}
1385
1386static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001387string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001388{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001389 if ( index != 0 ) {
1390 PyErr_SetString(PyExc_SystemError,
1391 "accessing non-existent string segment");
1392 return -1;
1393 }
1394 *ptr = self->ob_sval;
1395 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001396}
1397
1398static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001399string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001400{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001401 return PyBuffer_FillInfo(view, (PyObject*)self,
1402 (void *)self->ob_sval, Py_SIZE(self),
1403 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001404}
1405
1406static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001407 (lenfunc)string_length, /*sq_length*/
1408 (binaryfunc)string_concat, /*sq_concat*/
1409 (ssizeargfunc)string_repeat, /*sq_repeat*/
1410 (ssizeargfunc)string_item, /*sq_item*/
1411 (ssizessizeargfunc)string_slice, /*sq_slice*/
1412 0, /*sq_ass_item*/
1413 0, /*sq_ass_slice*/
1414 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001415};
1416
1417static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001418 (lenfunc)string_length,
1419 (binaryfunc)string_subscript,
1420 0,
Christian Heimes44720832008-05-26 13:01:01 +00001421};
1422
1423static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001424 (readbufferproc)string_buffer_getreadbuf,
1425 (writebufferproc)string_buffer_getwritebuf,
1426 (segcountproc)string_buffer_getsegcount,
1427 (charbufferproc)string_buffer_getcharbuf,
1428 (getbufferproc)string_buffer_getbuffer,
1429 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001430};
1431
1432
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001433
Christian Heimes44720832008-05-26 13:01:01 +00001434#define LEFTSTRIP 0
1435#define RIGHTSTRIP 1
1436#define BOTHSTRIP 2
1437
1438/* Arrays indexed by above */
1439static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1440
1441#define STRIPNAME(i) (stripformat[i]+3)
1442
Christian Heimes1a6387e2008-03-26 12:49:49 +00001443PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001444"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001445\n\
Christian Heimes44720832008-05-26 13:01:01 +00001446Return a list of the words in the string S, using sep as the\n\
1447delimiter string. If maxsplit is given, at most maxsplit\n\
1448splits are done. If sep is not specified or is None, any\n\
1449whitespace string is a separator and empty strings are removed\n\
1450from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001451
1452static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001453string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001454{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001455 Py_ssize_t len = PyString_GET_SIZE(self), n;
1456 Py_ssize_t maxsplit = -1;
1457 const char *s = PyString_AS_STRING(self), *sub;
1458 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001459
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001460 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1461 return NULL;
1462 if (maxsplit < 0)
1463 maxsplit = PY_SSIZE_T_MAX;
1464 if (subobj == Py_None)
1465 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1466 if (PyString_Check(subobj)) {
1467 sub = PyString_AS_STRING(subobj);
1468 n = PyString_GET_SIZE(subobj);
1469 }
Christian Heimes44720832008-05-26 13:01:01 +00001470#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001471 else if (PyUnicode_Check(subobj))
1472 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001473#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001474 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1475 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001476
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001477 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001478}
1479
1480PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001481"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001482\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001483Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001484the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001485found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001486
1487static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001488string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001489{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001490 const char *sep;
1491 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001492
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001493 if (PyString_Check(sep_obj)) {
1494 sep = PyString_AS_STRING(sep_obj);
1495 sep_len = PyString_GET_SIZE(sep_obj);
1496 }
Christian Heimes44720832008-05-26 13:01:01 +00001497#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001498 else if (PyUnicode_Check(sep_obj))
1499 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001500#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001501 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1502 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001503
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001504 return stringlib_partition(
1505 (PyObject*) self,
1506 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1507 sep_obj, sep, sep_len
1508 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001509}
1510
1511PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001512"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001513\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001514Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001515the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001516separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001517
1518static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001519string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001520{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001521 const char *sep;
1522 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001523
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001524 if (PyString_Check(sep_obj)) {
1525 sep = PyString_AS_STRING(sep_obj);
1526 sep_len = PyString_GET_SIZE(sep_obj);
1527 }
Christian Heimes44720832008-05-26 13:01:01 +00001528#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001529 else if (PyUnicode_Check(sep_obj))
1530 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001531#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001532 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1533 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001534
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001535 return stringlib_rpartition(
1536 (PyObject*) self,
1537 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1538 sep_obj, sep, sep_len
1539 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001540}
1541
Christian Heimes1a6387e2008-03-26 12:49:49 +00001542PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001543"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001544\n\
Christian Heimes44720832008-05-26 13:01:01 +00001545Return a list of the words in the string S, using sep as the\n\
1546delimiter string, starting at the end of the string and working\n\
1547to the front. If maxsplit is given, at most maxsplit splits are\n\
1548done. If sep is not specified or is None, any whitespace string\n\
1549is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001550
1551static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001552string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001553{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001554 Py_ssize_t len = PyString_GET_SIZE(self), n;
1555 Py_ssize_t maxsplit = -1;
1556 const char *s = PyString_AS_STRING(self), *sub;
1557 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001558
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001559 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1560 return NULL;
1561 if (maxsplit < 0)
1562 maxsplit = PY_SSIZE_T_MAX;
1563 if (subobj == Py_None)
1564 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1565 if (PyString_Check(subobj)) {
1566 sub = PyString_AS_STRING(subobj);
1567 n = PyString_GET_SIZE(subobj);
1568 }
Christian Heimes44720832008-05-26 13:01:01 +00001569#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001570 else if (PyUnicode_Check(subobj))
1571 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001572#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001573 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1574 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001575
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001576 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001577}
1578
1579
1580PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001581"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001582\n\
1583Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001584iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001585
1586static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001587string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001588{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001589 char *sep = PyString_AS_STRING(self);
1590 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1591 PyObject *res = NULL;
1592 char *p;
1593 Py_ssize_t seqlen = 0;
1594 size_t sz = 0;
1595 Py_ssize_t i;
1596 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001597
Benjamin Peterson1643d5c2014-09-28 12:48:46 -04001598 seq = PySequence_Fast(orig, "can only join an iterable");
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001599 if (seq == NULL) {
1600 return NULL;
1601 }
Christian Heimes44720832008-05-26 13:01:01 +00001602
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001603 seqlen = PySequence_Size(seq);
1604 if (seqlen == 0) {
1605 Py_DECREF(seq);
1606 return PyString_FromString("");
1607 }
1608 if (seqlen == 1) {
1609 item = PySequence_Fast_GET_ITEM(seq, 0);
1610 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1611 Py_INCREF(item);
1612 Py_DECREF(seq);
1613 return item;
1614 }
1615 }
Christian Heimes44720832008-05-26 13:01:01 +00001616
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001617 /* There are at least two things to join, or else we have a subclass
1618 * of the builtin types in the sequence.
1619 * Do a pre-pass to figure out the total amount of space we'll
1620 * need (sz), see whether any argument is absurd, and defer to
1621 * the Unicode join if appropriate.
1622 */
1623 for (i = 0; i < seqlen; i++) {
1624 const size_t old_sz = sz;
1625 item = PySequence_Fast_GET_ITEM(seq, i);
1626 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001627#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001628 if (PyUnicode_Check(item)) {
1629 /* Defer to Unicode join.
Martin Panterb1d867f2016-05-26 05:28:50 +00001630 * CAUTION: There's no guarantee that the
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001631 * original sequence can be iterated over
1632 * again, so we must pass seq here.
1633 */
1634 PyObject *result;
1635 result = PyUnicode_Join((PyObject *)self, seq);
1636 Py_DECREF(seq);
1637 return result;
1638 }
Christian Heimes44720832008-05-26 13:01:01 +00001639#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001640 PyErr_Format(PyExc_TypeError,
1641 "sequence item %zd: expected string,"
1642 " %.80s found",
1643 i, Py_TYPE(item)->tp_name);
1644 Py_DECREF(seq);
1645 return NULL;
1646 }
1647 sz += PyString_GET_SIZE(item);
1648 if (i != 0)
1649 sz += seplen;
1650 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1651 PyErr_SetString(PyExc_OverflowError,
1652 "join() result is too long for a Python string");
1653 Py_DECREF(seq);
1654 return NULL;
1655 }
1656 }
Christian Heimes44720832008-05-26 13:01:01 +00001657
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001658 /* Allocate result space. */
1659 res = PyString_FromStringAndSize((char*)NULL, sz);
1660 if (res == NULL) {
1661 Py_DECREF(seq);
1662 return NULL;
1663 }
Christian Heimes44720832008-05-26 13:01:01 +00001664
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001665 /* Catenate everything. */
1666 p = PyString_AS_STRING(res);
1667 for (i = 0; i < seqlen; ++i) {
1668 size_t n;
1669 item = PySequence_Fast_GET_ITEM(seq, i);
1670 n = PyString_GET_SIZE(item);
1671 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1672 p += n;
1673 if (i < seqlen - 1) {
1674 Py_MEMCPY(p, sep, seplen);
1675 p += seplen;
1676 }
1677 }
Christian Heimes44720832008-05-26 13:01:01 +00001678
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001679 Py_DECREF(seq);
1680 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001681}
1682
1683PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001684_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001685{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001686 assert(sep != NULL && PyString_Check(sep));
1687 assert(x != NULL);
1688 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001689}
1690
Antoine Pitrou64672132010-01-13 07:55:48 +00001691/* helper macro to fixup start/end slice values */
1692#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001693 if (end > len) \
1694 end = len; \
1695 else if (end < 0) { \
1696 end += len; \
1697 if (end < 0) \
1698 end = 0; \
1699 } \
1700 if (start < 0) { \
1701 start += len; \
1702 if (start < 0) \
1703 start = 0; \
1704 }
Christian Heimes44720832008-05-26 13:01:01 +00001705
1706Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001707string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001708{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001709 PyObject *subobj;
1710 const char *sub;
1711 Py_ssize_t sub_len;
1712 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00001713
Jesus Cea44e81682011-04-20 16:39:15 +02001714 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1715 args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001716 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001717
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001718 if (PyString_Check(subobj)) {
1719 sub = PyString_AS_STRING(subobj);
1720 sub_len = PyString_GET_SIZE(subobj);
1721 }
Christian Heimes44720832008-05-26 13:01:01 +00001722#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001723 else if (PyUnicode_Check(subobj))
1724 return PyUnicode_Find(
1725 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001726#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001727 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1728 /* XXX - the "expected a character buffer object" is pretty
1729 confusing for a non-expert. remap to something else ? */
1730 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001731
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001732 if (dir > 0)
1733 return stringlib_find_slice(
1734 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735 sub, sub_len, start, end);
1736 else
1737 return stringlib_rfind_slice(
1738 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1739 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001740}
1741
1742
1743PyDoc_STRVAR(find__doc__,
1744"S.find(sub [,start [,end]]) -> int\n\
1745\n\
1746Return the lowest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001747such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001748arguments start and end are interpreted as in slice notation.\n\
1749\n\
1750Return -1 on failure.");
1751
1752static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001753string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001754{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001755 Py_ssize_t result = string_find_internal(self, args, +1);
1756 if (result == -2)
1757 return NULL;
1758 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001759}
1760
1761
1762PyDoc_STRVAR(index__doc__,
1763"S.index(sub [,start [,end]]) -> int\n\
1764\n\
1765Like S.find() but raise ValueError when the substring is not found.");
1766
1767static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001768string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001769{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001770 Py_ssize_t result = string_find_internal(self, args, +1);
1771 if (result == -2)
1772 return NULL;
1773 if (result == -1) {
1774 PyErr_SetString(PyExc_ValueError,
1775 "substring not found");
1776 return NULL;
1777 }
1778 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001779}
1780
1781
1782PyDoc_STRVAR(rfind__doc__,
1783"S.rfind(sub [,start [,end]]) -> int\n\
1784\n\
1785Return the highest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001786such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001787arguments start and end are interpreted as in slice notation.\n\
1788\n\
1789Return -1 on failure.");
1790
1791static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001792string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001793{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001794 Py_ssize_t result = string_find_internal(self, args, -1);
1795 if (result == -2)
1796 return NULL;
1797 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001798}
1799
1800
1801PyDoc_STRVAR(rindex__doc__,
1802"S.rindex(sub [,start [,end]]) -> int\n\
1803\n\
1804Like S.rfind() but raise ValueError when the substring is not found.");
1805
1806static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001807string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001808{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001809 Py_ssize_t result = string_find_internal(self, args, -1);
1810 if (result == -2)
1811 return NULL;
1812 if (result == -1) {
1813 PyErr_SetString(PyExc_ValueError,
1814 "substring not found");
1815 return NULL;
1816 }
1817 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001818}
1819
1820
1821Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001822do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001823{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001824 char *s = PyString_AS_STRING(self);
1825 Py_ssize_t len = PyString_GET_SIZE(self);
1826 char *sep = PyString_AS_STRING(sepobj);
1827 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1828 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001829
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001830 i = 0;
1831 if (striptype != RIGHTSTRIP) {
1832 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1833 i++;
1834 }
1835 }
Christian Heimes44720832008-05-26 13:01:01 +00001836
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001837 j = len;
1838 if (striptype != LEFTSTRIP) {
1839 do {
1840 j--;
1841 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1842 j++;
1843 }
Christian Heimes44720832008-05-26 13:01:01 +00001844
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001845 if (i == 0 && j == len && PyString_CheckExact(self)) {
1846 Py_INCREF(self);
1847 return (PyObject*)self;
1848 }
1849 else
1850 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001851}
1852
1853
1854Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001855do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001856{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001857 char *s = PyString_AS_STRING(self);
1858 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001859
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001860 i = 0;
1861 if (striptype != RIGHTSTRIP) {
1862 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1863 i++;
1864 }
1865 }
Christian Heimes44720832008-05-26 13:01:01 +00001866
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001867 j = len;
1868 if (striptype != LEFTSTRIP) {
1869 do {
1870 j--;
1871 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1872 j++;
1873 }
Christian Heimes44720832008-05-26 13:01:01 +00001874
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001875 if (i == 0 && j == len && PyString_CheckExact(self)) {
1876 Py_INCREF(self);
1877 return (PyObject*)self;
1878 }
1879 else
1880 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001881}
1882
1883
1884Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001885do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001886{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001887 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001888
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001889 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1890 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001891
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001892 if (sep != NULL && sep != Py_None) {
1893 if (PyString_Check(sep))
1894 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001895#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001896 else if (PyUnicode_Check(sep)) {
1897 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1898 PyObject *res;
1899 if (uniself==NULL)
1900 return NULL;
1901 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1902 striptype, sep);
1903 Py_DECREF(uniself);
1904 return res;
1905 }
Christian Heimes44720832008-05-26 13:01:01 +00001906#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001907 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001908#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001909 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001910#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001911 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001912#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001913 STRIPNAME(striptype));
1914 return NULL;
1915 }
Christian Heimes44720832008-05-26 13:01:01 +00001916
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001917 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001918}
1919
1920
1921PyDoc_STRVAR(strip__doc__,
1922"S.strip([chars]) -> string or unicode\n\
1923\n\
1924Return a copy of the string S with leading and trailing\n\
1925whitespace removed.\n\
1926If chars is given and not None, remove characters in chars instead.\n\
1927If chars is unicode, S will be converted to unicode before stripping");
1928
1929static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001930string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001931{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001932 if (PyTuple_GET_SIZE(args) == 0)
1933 return do_strip(self, BOTHSTRIP); /* Common case */
1934 else
1935 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001936}
1937
1938
1939PyDoc_STRVAR(lstrip__doc__,
1940"S.lstrip([chars]) -> string or unicode\n\
1941\n\
1942Return a copy of the string S with leading whitespace removed.\n\
1943If chars is given and not None, remove characters in chars instead.\n\
1944If chars is unicode, S will be converted to unicode before stripping");
1945
1946static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001947string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001948{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001949 if (PyTuple_GET_SIZE(args) == 0)
1950 return do_strip(self, LEFTSTRIP); /* Common case */
1951 else
1952 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001953}
1954
1955
1956PyDoc_STRVAR(rstrip__doc__,
1957"S.rstrip([chars]) -> string or unicode\n\
1958\n\
1959Return a copy of the string S with trailing whitespace removed.\n\
1960If chars is given and not None, remove characters in chars instead.\n\
1961If chars is unicode, S will be converted to unicode before stripping");
1962
1963static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001964string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001965{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001966 if (PyTuple_GET_SIZE(args) == 0)
1967 return do_strip(self, RIGHTSTRIP); /* Common case */
1968 else
1969 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001970}
1971
1972
1973PyDoc_STRVAR(lower__doc__,
1974"S.lower() -> string\n\
1975\n\
1976Return a copy of the string S converted to lowercase.");
1977
1978/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1979#ifndef _tolower
1980#define _tolower tolower
1981#endif
1982
1983static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001984string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001985{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001986 char *s;
1987 Py_ssize_t i, n = PyString_GET_SIZE(self);
1988 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001989
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001990 newobj = PyString_FromStringAndSize(NULL, n);
1991 if (!newobj)
1992 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001993
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001994 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00001995
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001996 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00001997
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001998 for (i = 0; i < n; i++) {
1999 int c = Py_CHARMASK(s[i]);
2000 if (isupper(c))
2001 s[i] = _tolower(c);
2002 }
Christian Heimes44720832008-05-26 13:01:01 +00002003
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002004 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002005}
2006
2007PyDoc_STRVAR(upper__doc__,
2008"S.upper() -> string\n\
2009\n\
2010Return a copy of the string S converted to uppercase.");
2011
2012#ifndef _toupper
2013#define _toupper toupper
2014#endif
2015
2016static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002017string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002018{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002019 char *s;
2020 Py_ssize_t i, n = PyString_GET_SIZE(self);
2021 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002022
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002023 newobj = PyString_FromStringAndSize(NULL, n);
2024 if (!newobj)
2025 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002026
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002027 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002028
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002029 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002030
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002031 for (i = 0; i < n; i++) {
2032 int c = Py_CHARMASK(s[i]);
2033 if (islower(c))
2034 s[i] = _toupper(c);
2035 }
Christian Heimes44720832008-05-26 13:01:01 +00002036
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002037 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002038}
2039
2040PyDoc_STRVAR(title__doc__,
2041"S.title() -> string\n\
2042\n\
2043Return a titlecased version of S, i.e. words start with uppercase\n\
2044characters, all remaining cased characters have lowercase.");
2045
2046static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002047string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002048{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002049 char *s = PyString_AS_STRING(self), *s_new;
2050 Py_ssize_t i, n = PyString_GET_SIZE(self);
2051 int previous_is_cased = 0;
2052 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002053
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002054 newobj = PyString_FromStringAndSize(NULL, n);
2055 if (newobj == NULL)
2056 return NULL;
2057 s_new = PyString_AsString(newobj);
2058 for (i = 0; i < n; i++) {
2059 int c = Py_CHARMASK(*s++);
2060 if (islower(c)) {
2061 if (!previous_is_cased)
2062 c = toupper(c);
2063 previous_is_cased = 1;
2064 } else if (isupper(c)) {
2065 if (previous_is_cased)
2066 c = tolower(c);
2067 previous_is_cased = 1;
2068 } else
2069 previous_is_cased = 0;
2070 *s_new++ = c;
2071 }
2072 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002073}
2074
2075PyDoc_STRVAR(capitalize__doc__,
2076"S.capitalize() -> string\n\
2077\n\
2078Return a copy of the string S with only its first character\n\
2079capitalized.");
2080
2081static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002082string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002083{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002084 char *s = PyString_AS_STRING(self), *s_new;
2085 Py_ssize_t i, n = PyString_GET_SIZE(self);
2086 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002087
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002088 newobj = PyString_FromStringAndSize(NULL, n);
2089 if (newobj == NULL)
2090 return NULL;
2091 s_new = PyString_AsString(newobj);
2092 if (0 < n) {
2093 int c = Py_CHARMASK(*s++);
2094 if (islower(c))
2095 *s_new = toupper(c);
2096 else
2097 *s_new = c;
2098 s_new++;
2099 }
2100 for (i = 1; i < n; i++) {
2101 int c = Py_CHARMASK(*s++);
2102 if (isupper(c))
2103 *s_new = tolower(c);
2104 else
2105 *s_new = c;
2106 s_new++;
2107 }
2108 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002109}
2110
2111
2112PyDoc_STRVAR(count__doc__,
2113"S.count(sub[, start[, end]]) -> int\n\
2114\n\
2115Return the number of non-overlapping occurrences of substring sub in\n\
2116string S[start:end]. Optional arguments start and end are interpreted\n\
2117as in slice notation.");
2118
2119static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002120string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002121{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002122 PyObject *sub_obj;
2123 const char *str = PyString_AS_STRING(self), *sub;
2124 Py_ssize_t sub_len;
2125 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002126
Jesus Cea44e81682011-04-20 16:39:15 +02002127 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002128 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002129
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002130 if (PyString_Check(sub_obj)) {
2131 sub = PyString_AS_STRING(sub_obj);
2132 sub_len = PyString_GET_SIZE(sub_obj);
2133 }
Christian Heimes44720832008-05-26 13:01:01 +00002134#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002135 else if (PyUnicode_Check(sub_obj)) {
2136 Py_ssize_t count;
2137 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2138 if (count == -1)
2139 return NULL;
2140 else
2141 return PyInt_FromSsize_t(count);
2142 }
Christian Heimes44720832008-05-26 13:01:01 +00002143#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002144 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2145 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002146
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002147 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002148
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002149 return PyInt_FromSsize_t(
2150 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2151 );
Christian Heimes44720832008-05-26 13:01:01 +00002152}
2153
2154PyDoc_STRVAR(swapcase__doc__,
2155"S.swapcase() -> string\n\
2156\n\
2157Return a copy of the string S with uppercase characters\n\
2158converted to lowercase and vice versa.");
2159
2160static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002161string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002162{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002163 char *s = PyString_AS_STRING(self), *s_new;
2164 Py_ssize_t i, n = PyString_GET_SIZE(self);
2165 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002166
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002167 newobj = PyString_FromStringAndSize(NULL, n);
2168 if (newobj == NULL)
2169 return NULL;
2170 s_new = PyString_AsString(newobj);
2171 for (i = 0; i < n; i++) {
2172 int c = Py_CHARMASK(*s++);
2173 if (islower(c)) {
2174 *s_new = toupper(c);
2175 }
2176 else if (isupper(c)) {
2177 *s_new = tolower(c);
2178 }
2179 else
2180 *s_new = c;
2181 s_new++;
2182 }
2183 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002184}
2185
2186
2187PyDoc_STRVAR(translate__doc__,
2188"S.translate(table [,deletechars]) -> string\n\
2189\n\
2190Return a copy of the string S, where all characters occurring\n\
2191in the optional argument deletechars are removed, and the\n\
2192remaining characters have been mapped through the given\n\
Mark Dickinsoncb9bf1a2011-06-25 11:00:12 +02002193translation table, which must be a string of length 256 or None.\n\
2194If the table argument is None, no translation is applied and\n\
2195the operation simply removes the characters in deletechars.");
Christian Heimes44720832008-05-26 13:01:01 +00002196
2197static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002198string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002199{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002200 register char *input, *output;
2201 const char *table;
2202 register Py_ssize_t i, c, changed = 0;
2203 PyObject *input_obj = (PyObject*)self;
2204 const char *output_start, *del_table=NULL;
2205 Py_ssize_t inlen, tablen, dellen = 0;
2206 PyObject *result;
2207 int trans_table[256];
2208 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002209
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002210 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2211 &tableobj, &delobj))
2212 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002213
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002214 if (PyString_Check(tableobj)) {
2215 table = PyString_AS_STRING(tableobj);
2216 tablen = PyString_GET_SIZE(tableobj);
2217 }
2218 else if (tableobj == Py_None) {
2219 table = NULL;
2220 tablen = 256;
2221 }
Christian Heimes44720832008-05-26 13:01:01 +00002222#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002223 else if (PyUnicode_Check(tableobj)) {
2224 /* Unicode .translate() does not support the deletechars
2225 parameter; instead a mapping to None will cause characters
2226 to be deleted. */
2227 if (delobj != NULL) {
2228 PyErr_SetString(PyExc_TypeError,
2229 "deletions are implemented differently for unicode");
2230 return NULL;
2231 }
2232 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2233 }
Christian Heimes44720832008-05-26 13:01:01 +00002234#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002235 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2236 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002237
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002238 if (tablen != 256) {
2239 PyErr_SetString(PyExc_ValueError,
2240 "translation table must be 256 characters long");
2241 return NULL;
2242 }
Christian Heimes44720832008-05-26 13:01:01 +00002243
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002244 if (delobj != NULL) {
2245 if (PyString_Check(delobj)) {
2246 del_table = PyString_AS_STRING(delobj);
2247 dellen = PyString_GET_SIZE(delobj);
2248 }
Christian Heimes44720832008-05-26 13:01:01 +00002249#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002250 else if (PyUnicode_Check(delobj)) {
2251 PyErr_SetString(PyExc_TypeError,
2252 "deletions are implemented differently for unicode");
2253 return NULL;
2254 }
Christian Heimes44720832008-05-26 13:01:01 +00002255#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002256 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2257 return NULL;
2258 }
2259 else {
2260 del_table = NULL;
2261 dellen = 0;
2262 }
Christian Heimes44720832008-05-26 13:01:01 +00002263
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002264 inlen = PyString_GET_SIZE(input_obj);
2265 result = PyString_FromStringAndSize((char *)NULL, inlen);
2266 if (result == NULL)
2267 return NULL;
2268 output_start = output = PyString_AsString(result);
2269 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002270
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002271 if (dellen == 0 && table != NULL) {
2272 /* If no deletions are required, use faster code */
2273 for (i = inlen; --i >= 0; ) {
2274 c = Py_CHARMASK(*input++);
2275 if (Py_CHARMASK((*output++ = table[c])) != c)
2276 changed = 1;
2277 }
2278 if (changed || !PyString_CheckExact(input_obj))
2279 return result;
2280 Py_DECREF(result);
2281 Py_INCREF(input_obj);
2282 return input_obj;
2283 }
Christian Heimes44720832008-05-26 13:01:01 +00002284
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002285 if (table == NULL) {
2286 for (i = 0; i < 256; i++)
2287 trans_table[i] = Py_CHARMASK(i);
2288 } else {
2289 for (i = 0; i < 256; i++)
2290 trans_table[i] = Py_CHARMASK(table[i]);
2291 }
Christian Heimes44720832008-05-26 13:01:01 +00002292
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002293 for (i = 0; i < dellen; i++)
2294 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002295
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002296 for (i = inlen; --i >= 0; ) {
2297 c = Py_CHARMASK(*input++);
2298 if (trans_table[c] != -1)
2299 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2300 continue;
2301 changed = 1;
2302 }
2303 if (!changed && PyString_CheckExact(input_obj)) {
2304 Py_DECREF(result);
2305 Py_INCREF(input_obj);
2306 return input_obj;
2307 }
2308 /* Fix the size of the resulting string */
2309 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2310 return NULL;
2311 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002312}
2313
2314
Christian Heimes44720832008-05-26 13:01:01 +00002315/* find and count characters and substrings */
2316
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002317#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002318 ((char *)memchr((const void *)(target), c, target_len))
2319
2320/* String ops must return a string. */
2321/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002322Py_LOCAL(PyStringObject *)
2323return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002324{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002325 if (PyString_CheckExact(self)) {
2326 Py_INCREF(self);
2327 return self;
2328 }
2329 return (PyStringObject *)PyString_FromStringAndSize(
2330 PyString_AS_STRING(self),
2331 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002332}
2333
2334Py_LOCAL_INLINE(Py_ssize_t)
Ronald Oussoren3687e802013-07-11 13:33:55 +02002335countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002336{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002337 Py_ssize_t count=0;
2338 const char *start=target;
2339 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002340
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002341 while ( (start=findchar(start, end-start, c)) != NULL ) {
2342 count++;
2343 if (count >= maxcount)
2344 break;
2345 start += 1;
2346 }
2347 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002348}
2349
Christian Heimes44720832008-05-26 13:01:01 +00002350
2351/* Algorithms for different cases of string replacement */
2352
2353/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002354Py_LOCAL(PyStringObject *)
2355replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002356 const char *to_s, Py_ssize_t to_len,
2357 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002358{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002359 char *self_s, *result_s;
2360 Py_ssize_t self_len, result_len;
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002361 Py_ssize_t count, i;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002362 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002363
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002364 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002365
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002366 /* 1 at the end plus 1 after every character;
2367 count = min(maxcount, self_len + 1) */
2368 if (maxcount <= self_len) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002369 count = maxcount;
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002370 }
2371 else {
2372 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2373 count = self_len + 1;
2374 }
Christian Heimes44720832008-05-26 13:01:01 +00002375
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002376 /* Check for overflow */
2377 /* result_len = count * to_len + self_len; */
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002378 assert(count > 0);
2379 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002380 PyErr_SetString(PyExc_OverflowError,
2381 "replace string is too long");
2382 return NULL;
2383 }
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002384 result_len = count * to_len + self_len;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002385 if (! (result = (PyStringObject *)
2386 PyString_FromStringAndSize(NULL, result_len)) )
2387 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002388
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002389 self_s = PyString_AS_STRING(self);
2390 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002391
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002392 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002393
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002394 /* Lay the first one down (guaranteed this will occur) */
2395 Py_MEMCPY(result_s, to_s, to_len);
2396 result_s += to_len;
2397 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002398
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002399 for (i=0; i<count; i++) {
2400 *result_s++ = *self_s++;
2401 Py_MEMCPY(result_s, to_s, to_len);
2402 result_s += to_len;
2403 }
2404
2405 /* Copy the rest of the original string */
2406 Py_MEMCPY(result_s, self_s, self_len-i);
2407
2408 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002409}
2410
2411/* Special case for deleting a single character */
2412/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002413Py_LOCAL(PyStringObject *)
2414replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002415 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002416{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002417 char *self_s, *result_s;
2418 char *start, *next, *end;
2419 Py_ssize_t self_len, result_len;
2420 Py_ssize_t count;
2421 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002422
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002423 self_len = PyString_GET_SIZE(self);
2424 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002425
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002426 count = countchar(self_s, self_len, from_c, maxcount);
2427 if (count == 0) {
2428 return return_self(self);
2429 }
Christian Heimes44720832008-05-26 13:01:01 +00002430
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002431 result_len = self_len - count; /* from_len == 1 */
2432 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002433
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002434 if ( (result = (PyStringObject *)
2435 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2436 return NULL;
2437 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002438
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002439 start = self_s;
2440 end = self_s + self_len;
2441 while (count-- > 0) {
2442 next = findchar(start, end-start, from_c);
2443 if (next == NULL)
2444 break;
2445 Py_MEMCPY(result_s, start, next-start);
2446 result_s += (next-start);
2447 start = next+1;
2448 }
2449 Py_MEMCPY(result_s, start, end-start);
2450
2451 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002452}
2453
2454/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2455
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002456Py_LOCAL(PyStringObject *)
2457replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002458 const char *from_s, Py_ssize_t from_len,
2459 Py_ssize_t maxcount) {
2460 char *self_s, *result_s;
2461 char *start, *next, *end;
2462 Py_ssize_t self_len, result_len;
2463 Py_ssize_t count, offset;
2464 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002465
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002466 self_len = PyString_GET_SIZE(self);
2467 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002468
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002469 count = stringlib_count(self_s, self_len,
2470 from_s, from_len,
2471 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002472
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002473 if (count == 0) {
2474 /* no matches */
2475 return return_self(self);
2476 }
Christian Heimes44720832008-05-26 13:01:01 +00002477
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002478 result_len = self_len - (count * from_len);
2479 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002480
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002481 if ( (result = (PyStringObject *)
2482 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2483 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002484
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002485 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002486
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002487 start = self_s;
2488 end = self_s + self_len;
2489 while (count-- > 0) {
2490 offset = stringlib_find(start, end-start,
2491 from_s, from_len,
2492 0);
2493 if (offset == -1)
2494 break;
2495 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002496
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002497 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002498
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002499 result_s += (next-start);
2500 start = next+from_len;
2501 }
2502 Py_MEMCPY(result_s, start, end-start);
2503 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002504}
2505
2506/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002507Py_LOCAL(PyStringObject *)
2508replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002509 char from_c, char to_c,
2510 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002511{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002512 char *self_s, *result_s, *start, *end, *next;
2513 Py_ssize_t self_len;
2514 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002515
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002516 /* The result string will be the same size */
2517 self_s = PyString_AS_STRING(self);
2518 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002519
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002520 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002521
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002522 if (next == NULL) {
2523 /* No matches; return the original string */
2524 return return_self(self);
2525 }
Christian Heimes44720832008-05-26 13:01:01 +00002526
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002527 /* Need to make a new string */
2528 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2529 if (result == NULL)
2530 return NULL;
2531 result_s = PyString_AS_STRING(result);
2532 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002533
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002534 /* change everything in-place, starting with this one */
2535 start = result_s + (next-self_s);
2536 *start = to_c;
2537 start++;
2538 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002539
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002540 while (--maxcount > 0) {
2541 next = findchar(start, end-start, from_c);
2542 if (next == NULL)
2543 break;
2544 *next = to_c;
2545 start = next+1;
2546 }
Christian Heimes44720832008-05-26 13:01:01 +00002547
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002548 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002549}
2550
2551/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002552Py_LOCAL(PyStringObject *)
2553replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002554 const char *from_s, Py_ssize_t from_len,
2555 const char *to_s, Py_ssize_t to_len,
2556 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002557{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002558 char *result_s, *start, *end;
2559 char *self_s;
2560 Py_ssize_t self_len, offset;
2561 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002562
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002563 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002564
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002565 self_s = PyString_AS_STRING(self);
2566 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002567
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002568 offset = stringlib_find(self_s, self_len,
2569 from_s, from_len,
2570 0);
2571 if (offset == -1) {
2572 /* No matches; return the original string */
2573 return return_self(self);
2574 }
Christian Heimes44720832008-05-26 13:01:01 +00002575
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002576 /* Need to make a new string */
2577 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2578 if (result == NULL)
2579 return NULL;
2580 result_s = PyString_AS_STRING(result);
2581 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002582
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002583 /* change everything in-place, starting with this one */
2584 start = result_s + offset;
2585 Py_MEMCPY(start, to_s, from_len);
2586 start += from_len;
2587 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002588
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002589 while ( --maxcount > 0) {
2590 offset = stringlib_find(start, end-start,
2591 from_s, from_len,
2592 0);
2593 if (offset==-1)
2594 break;
2595 Py_MEMCPY(start+offset, to_s, from_len);
2596 start += offset+from_len;
2597 }
Christian Heimes44720832008-05-26 13:01:01 +00002598
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002599 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002600}
2601
2602/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002603Py_LOCAL(PyStringObject *)
2604replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002605 char from_c,
2606 const char *to_s, Py_ssize_t to_len,
2607 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002608{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002609 char *self_s, *result_s;
2610 char *start, *next, *end;
2611 Py_ssize_t self_len, result_len;
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002612 Py_ssize_t count;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002613 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002614
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002615 self_s = PyString_AS_STRING(self);
2616 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002617
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002618 count = countchar(self_s, self_len, from_c, maxcount);
2619 if (count == 0) {
2620 /* no matches, return unchanged */
2621 return return_self(self);
2622 }
Christian Heimes44720832008-05-26 13:01:01 +00002623
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002624 /* use the difference between current and new, hence the "-1" */
2625 /* result_len = self_len + count * (to_len-1) */
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002626 assert(count > 0);
2627 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002628 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2629 return NULL;
2630 }
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002631 result_len = self_len + count * (to_len - 1);
Christian Heimes44720832008-05-26 13:01:01 +00002632
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002633 if ( (result = (PyStringObject *)
2634 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2635 return NULL;
2636 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002637
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002638 start = self_s;
2639 end = self_s + self_len;
2640 while (count-- > 0) {
2641 next = findchar(start, end-start, from_c);
2642 if (next == NULL)
2643 break;
Christian Heimes44720832008-05-26 13:01:01 +00002644
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002645 if (next == start) {
2646 /* replace with the 'to' */
2647 Py_MEMCPY(result_s, to_s, to_len);
2648 result_s += to_len;
2649 start += 1;
2650 } else {
2651 /* copy the unchanged old then the 'to' */
2652 Py_MEMCPY(result_s, start, next-start);
2653 result_s += (next-start);
2654 Py_MEMCPY(result_s, to_s, to_len);
2655 result_s += to_len;
2656 start = next+1;
2657 }
2658 }
2659 /* Copy the remainder of the remaining string */
2660 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002661
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002662 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002663}
2664
2665/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002666Py_LOCAL(PyStringObject *)
2667replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002668 const char *from_s, Py_ssize_t from_len,
2669 const char *to_s, Py_ssize_t to_len,
2670 Py_ssize_t maxcount) {
2671 char *self_s, *result_s;
2672 char *start, *next, *end;
2673 Py_ssize_t self_len, result_len;
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002674 Py_ssize_t count, offset;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002675 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002676
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002677 self_s = PyString_AS_STRING(self);
2678 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002679
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002680 count = stringlib_count(self_s, self_len,
2681 from_s, from_len,
2682 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002683
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002684 if (count == 0) {
2685 /* no matches, return unchanged */
2686 return return_self(self);
2687 }
Christian Heimes44720832008-05-26 13:01:01 +00002688
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002689 /* Check for overflow */
2690 /* result_len = self_len + count * (to_len-from_len) */
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002691 assert(count > 0);
2692 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002693 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2694 return NULL;
2695 }
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002696 result_len = self_len + count * (to_len - from_len);
Christian Heimes44720832008-05-26 13:01:01 +00002697
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002698 if ( (result = (PyStringObject *)
2699 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2700 return NULL;
2701 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002702
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002703 start = self_s;
2704 end = self_s + self_len;
2705 while (count-- > 0) {
2706 offset = stringlib_find(start, end-start,
2707 from_s, from_len,
2708 0);
2709 if (offset == -1)
2710 break;
2711 next = start+offset;
2712 if (next == start) {
2713 /* replace with the 'to' */
2714 Py_MEMCPY(result_s, to_s, to_len);
2715 result_s += to_len;
2716 start += from_len;
2717 } else {
2718 /* copy the unchanged old then the 'to' */
2719 Py_MEMCPY(result_s, start, next-start);
2720 result_s += (next-start);
2721 Py_MEMCPY(result_s, to_s, to_len);
2722 result_s += to_len;
2723 start = next+from_len;
2724 }
2725 }
2726 /* Copy the remainder of the remaining string */
2727 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002728
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002729 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002730}
2731
2732
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002733Py_LOCAL(PyStringObject *)
2734replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002735 const char *from_s, Py_ssize_t from_len,
2736 const char *to_s, Py_ssize_t to_len,
2737 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002738{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002739 if (maxcount < 0) {
2740 maxcount = PY_SSIZE_T_MAX;
2741 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2742 /* nothing to do; return the original string */
2743 return return_self(self);
2744 }
Christian Heimes44720832008-05-26 13:01:01 +00002745
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002746 if (maxcount == 0 ||
2747 (from_len == 0 && to_len == 0)) {
2748 /* nothing to do; return the original string */
2749 return return_self(self);
2750 }
Christian Heimes44720832008-05-26 13:01:01 +00002751
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002752 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002753
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002754 if (from_len == 0) {
2755 /* insert the 'to' string everywhere. */
2756 /* >>> "Python".replace("", ".") */
2757 /* '.P.y.t.h.o.n.' */
2758 return replace_interleave(self, to_s, to_len, maxcount);
2759 }
Christian Heimes44720832008-05-26 13:01:01 +00002760
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002761 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2762 /* point for an empty self string to generate a non-empty string */
2763 /* Special case so the remaining code always gets a non-empty string */
2764 if (PyString_GET_SIZE(self) == 0) {
2765 return return_self(self);
2766 }
Christian Heimes44720832008-05-26 13:01:01 +00002767
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002768 if (to_len == 0) {
Martin Panter440bbd02016-09-08 05:22:16 +00002769 /* delete all occurrences of 'from' string */
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002770 if (from_len == 1) {
2771 return replace_delete_single_character(
2772 self, from_s[0], maxcount);
2773 } else {
2774 return replace_delete_substring(self, from_s, from_len, maxcount);
2775 }
2776 }
Christian Heimes44720832008-05-26 13:01:01 +00002777
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002778 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002779
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002780 if (from_len == to_len) {
2781 if (from_len == 1) {
2782 return replace_single_character_in_place(
2783 self,
2784 from_s[0],
2785 to_s[0],
2786 maxcount);
2787 } else {
2788 return replace_substring_in_place(
2789 self, from_s, from_len, to_s, to_len, maxcount);
2790 }
2791 }
Christian Heimes44720832008-05-26 13:01:01 +00002792
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002793 /* Otherwise use the more generic algorithms */
2794 if (from_len == 1) {
2795 return replace_single_character(self, from_s[0],
2796 to_s, to_len, maxcount);
2797 } else {
2798 /* len('from')>=2, len('to')>=1 */
2799 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2800 }
Christian Heimes44720832008-05-26 13:01:01 +00002801}
2802
2803PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002804"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002805\n\
2806Return a copy of string S with all occurrences of substring\n\
2807old replaced by new. If the optional argument count is\n\
2808given, only the first count occurrences are replaced.");
2809
2810static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002811string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002812{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002813 Py_ssize_t count = -1;
2814 PyObject *from, *to;
2815 const char *from_s, *to_s;
2816 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002817
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002818 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2819 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002820
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002821 if (PyString_Check(from)) {
2822 from_s = PyString_AS_STRING(from);
2823 from_len = PyString_GET_SIZE(from);
2824 }
Christian Heimes44720832008-05-26 13:01:01 +00002825#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002826 if (PyUnicode_Check(from))
2827 return PyUnicode_Replace((PyObject *)self,
2828 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002829#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002830 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2831 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002832
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002833 if (PyString_Check(to)) {
2834 to_s = PyString_AS_STRING(to);
2835 to_len = PyString_GET_SIZE(to);
2836 }
Christian Heimes44720832008-05-26 13:01:01 +00002837#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002838 else if (PyUnicode_Check(to))
2839 return PyUnicode_Replace((PyObject *)self,
2840 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002841#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002842 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2843 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002844
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002845 return (PyObject *)replace((PyStringObject *) self,
2846 from_s, from_len,
2847 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002848}
2849
2850/** End DALKE **/
2851
2852/* Matches the end (direction >= 0) or start (direction < 0) of self
2853 * against substr, using the start and end arguments. Returns
2854 * -1 on error, 0 if not found and 1 if found.
2855 */
2856Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002857_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002858 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002859{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002860 Py_ssize_t len = PyString_GET_SIZE(self);
2861 Py_ssize_t slen;
2862 const char* sub;
2863 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002864
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002865 if (PyString_Check(substr)) {
2866 sub = PyString_AS_STRING(substr);
2867 slen = PyString_GET_SIZE(substr);
2868 }
Christian Heimes44720832008-05-26 13:01:01 +00002869#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002870 else if (PyUnicode_Check(substr))
2871 return PyUnicode_Tailmatch((PyObject *)self,
2872 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002873#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002874 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2875 return -1;
2876 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002877
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002878 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002879
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002880 if (direction < 0) {
2881 /* startswith */
2882 if (start+slen > len)
2883 return 0;
2884 } else {
2885 /* endswith */
2886 if (end-start < slen || start > len)
2887 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002888
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002889 if (end-slen > start)
2890 start = end - slen;
2891 }
2892 if (end-start >= slen)
2893 return ! memcmp(str+start, sub, slen);
2894 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002895}
2896
2897
2898PyDoc_STRVAR(startswith__doc__,
2899"S.startswith(prefix[, start[, end]]) -> bool\n\
2900\n\
2901Return True if S starts with the specified prefix, False otherwise.\n\
2902With optional start, test S beginning at that position.\n\
2903With optional end, stop comparing S at that position.\n\
2904prefix can also be a tuple of strings to try.");
2905
2906static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002907string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002908{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002909 Py_ssize_t start = 0;
2910 Py_ssize_t end = PY_SSIZE_T_MAX;
2911 PyObject *subobj;
2912 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002913
Jesus Cea44e81682011-04-20 16:39:15 +02002914 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002915 return NULL;
2916 if (PyTuple_Check(subobj)) {
2917 Py_ssize_t i;
2918 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2919 result = _string_tailmatch(self,
2920 PyTuple_GET_ITEM(subobj, i),
2921 start, end, -1);
2922 if (result == -1)
2923 return NULL;
2924 else if (result) {
2925 Py_RETURN_TRUE;
2926 }
2927 }
2928 Py_RETURN_FALSE;
2929 }
2930 result = _string_tailmatch(self, subobj, start, end, -1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002931 if (result == -1) {
2932 if (PyErr_ExceptionMatches(PyExc_TypeError))
2933 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2934 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002935 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002936 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002937 else
2938 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002939}
2940
2941
2942PyDoc_STRVAR(endswith__doc__,
2943"S.endswith(suffix[, start[, end]]) -> bool\n\
2944\n\
2945Return True if S ends with the specified suffix, False otherwise.\n\
2946With optional start, test S beginning at that position.\n\
2947With optional end, stop comparing S at that position.\n\
2948suffix can also be a tuple of strings to try.");
2949
2950static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002951string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002952{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002953 Py_ssize_t start = 0;
2954 Py_ssize_t end = PY_SSIZE_T_MAX;
2955 PyObject *subobj;
2956 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002957
Jesus Cea44e81682011-04-20 16:39:15 +02002958 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002959 return NULL;
2960 if (PyTuple_Check(subobj)) {
2961 Py_ssize_t i;
2962 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2963 result = _string_tailmatch(self,
2964 PyTuple_GET_ITEM(subobj, i),
2965 start, end, +1);
2966 if (result == -1)
2967 return NULL;
2968 else if (result) {
2969 Py_RETURN_TRUE;
2970 }
2971 }
2972 Py_RETURN_FALSE;
2973 }
2974 result = _string_tailmatch(self, subobj, start, end, +1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002975 if (result == -1) {
2976 if (PyErr_ExceptionMatches(PyExc_TypeError))
2977 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2978 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002979 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002980 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002981 else
2982 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002983}
2984
2985
2986PyDoc_STRVAR(encode__doc__,
2987"S.encode([encoding[,errors]]) -> object\n\
2988\n\
2989Encodes S using the codec registered for encoding. encoding defaults\n\
2990to the default encoding. errors may be given to set a different error\n\
2991handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2992a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2993'xmlcharrefreplace' as well as any other name registered with\n\
2994codecs.register_error that is able to handle UnicodeEncodeErrors.");
2995
2996static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00002997string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00002998{
Benjamin Peterson332d7212009-09-18 21:14:55 +00002999 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003000 char *encoding = NULL;
3001 char *errors = NULL;
3002 PyObject *v;
3003
Benjamin Peterson332d7212009-09-18 21:14:55 +00003004 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003005 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003006 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003007 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003008 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003009 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003010 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003011 PyErr_Format(PyExc_TypeError,
3012 "encoder did not return a string/unicode object "
3013 "(type=%.400s)",
3014 Py_TYPE(v)->tp_name);
3015 Py_DECREF(v);
3016 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003017 }
3018 return v;
3019
3020 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003021 return NULL;
3022}
3023
Christian Heimes44720832008-05-26 13:01:01 +00003024
3025PyDoc_STRVAR(decode__doc__,
3026"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003027\n\
Christian Heimes44720832008-05-26 13:01:01 +00003028Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003029to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003030handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3031a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003032as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003033able to handle UnicodeDecodeErrors.");
3034
3035static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003036string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003037{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003038 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003039 char *encoding = NULL;
3040 char *errors = NULL;
3041 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003042
Benjamin Peterson332d7212009-09-18 21:14:55 +00003043 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003044 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003045 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003046 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003047 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003048 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003049 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003050 PyErr_Format(PyExc_TypeError,
3051 "decoder did not return a string/unicode object "
3052 "(type=%.400s)",
3053 Py_TYPE(v)->tp_name);
3054 Py_DECREF(v);
3055 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003056 }
3057 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003058
Christian Heimes44720832008-05-26 13:01:01 +00003059 onError:
3060 return NULL;
3061}
3062
3063
3064PyDoc_STRVAR(expandtabs__doc__,
3065"S.expandtabs([tabsize]) -> string\n\
3066\n\
3067Return a copy of S where all tab characters are expanded using spaces.\n\
3068If tabsize is not given, a tab size of 8 characters is assumed.");
3069
3070static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003071string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003072{
3073 const char *e, *p, *qe;
3074 char *q;
3075 Py_ssize_t i, j, incr;
3076 PyObject *u;
3077 int tabsize = 8;
3078
3079 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003080 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003081
3082 /* First pass: determine size of output string */
3083 i = 0; /* chars up to and including most recent \n or \r */
3084 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003085 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Benjamin Peterson0e431b92014-03-30 19:16:44 -04003086 for (p = PyString_AS_STRING(self); p < e; p++) {
3087 if (*p == '\t') {
3088 if (tabsize > 0) {
3089 incr = tabsize - (j % tabsize);
3090 if (j > PY_SSIZE_T_MAX - incr)
3091 goto overflow1;
3092 j += incr;
3093 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003094 }
Benjamin Peterson0e431b92014-03-30 19:16:44 -04003095 else {
3096 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003097 goto overflow1;
Benjamin Peterson0e431b92014-03-30 19:16:44 -04003098 j++;
3099 if (*p == '\n' || *p == '\r') {
3100 if (i > PY_SSIZE_T_MAX - j)
3101 goto overflow1;
3102 i += j;
3103 j = 0;
3104 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003105 }
3106 }
Christian Heimes44720832008-05-26 13:01:01 +00003107
3108 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003109 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003110
3111 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003112 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003113 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003114 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003115
3116 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003117 q = PyString_AS_STRING(u); /* next output char */
3118 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003119
Benjamin Peterson8312ecc2014-03-30 19:23:24 -04003120 for (p = PyString_AS_STRING(self); p < e; p++) {
3121 if (*p == '\t') {
3122 if (tabsize > 0) {
3123 i = tabsize - (j % tabsize);
3124 j += i;
3125 while (i--) {
3126 if (q >= qe)
3127 goto overflow2;
3128 *q++ = ' ';
3129 }
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003130 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003131 }
Benjamin Peterson8312ecc2014-03-30 19:23:24 -04003132 else {
3133 if (q >= qe)
3134 goto overflow2;
3135 *q++ = *p;
3136 j++;
3137 if (*p == '\n' || *p == '\r')
3138 j = 0;
3139 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003140 }
Christian Heimes44720832008-05-26 13:01:01 +00003141
3142 return u;
3143
3144 overflow2:
3145 Py_DECREF(u);
3146 overflow1:
3147 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3148 return NULL;
3149}
3150
3151Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003152pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003153{
3154 PyObject *u;
3155
3156 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003157 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003158 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003159 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003160
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003161 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003162 Py_INCREF(self);
3163 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003164 }
3165
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003166 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003167 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003168 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003169 if (left)
3170 memset(PyString_AS_STRING(u), fill, left);
3171 Py_MEMCPY(PyString_AS_STRING(u) + left,
3172 PyString_AS_STRING(self),
3173 PyString_GET_SIZE(self));
3174 if (right)
3175 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3176 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003177 }
3178
3179 return u;
3180}
3181
3182PyDoc_STRVAR(ljust__doc__,
3183"S.ljust(width[, fillchar]) -> string\n"
3184"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003185"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003186"done using the specified fill character (default is a space).");
3187
3188static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003189string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003190{
3191 Py_ssize_t width;
3192 char fillchar = ' ';
3193
3194 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003195 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003196
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003197 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003198 Py_INCREF(self);
3199 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003200 }
3201
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003202 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003203}
3204
3205
3206PyDoc_STRVAR(rjust__doc__,
3207"S.rjust(width[, fillchar]) -> string\n"
3208"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003209"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003210"done using the specified fill character (default is a space)");
3211
3212static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003213string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003214{
3215 Py_ssize_t width;
3216 char fillchar = ' ';
3217
3218 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003219 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003220
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003221 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003222 Py_INCREF(self);
3223 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003224 }
3225
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003226 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003227}
3228
3229
3230PyDoc_STRVAR(center__doc__,
3231"S.center(width[, fillchar]) -> string\n"
3232"\n"
3233"Return S centered in a string of length width. Padding is\n"
3234"done using the specified fill character (default is a space)");
3235
3236static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003237string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003238{
3239 Py_ssize_t marg, left;
3240 Py_ssize_t width;
3241 char fillchar = ' ';
3242
3243 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003244 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003245
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003246 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003247 Py_INCREF(self);
3248 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003249 }
3250
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003251 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003252 left = marg / 2 + (marg & width & 1);
3253
3254 return pad(self, left, marg - left, fillchar);
3255}
3256
3257PyDoc_STRVAR(zfill__doc__,
3258"S.zfill(width) -> string\n"
3259"\n"
3260"Pad a numeric string S with zeros on the left, to fill a field\n"
3261"of the specified width. The string S is never truncated.");
3262
3263static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003264string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003265{
3266 Py_ssize_t fill;
3267 PyObject *s;
3268 char *p;
3269 Py_ssize_t width;
3270
3271 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003272 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003273
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003274 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003275 if (PyString_CheckExact(self)) {
3276 Py_INCREF(self);
3277 return (PyObject*) self;
3278 }
3279 else
3280 return PyString_FromStringAndSize(
Martin Panterca56dd42016-09-17 07:54:55 +00003281 PyString_AS_STRING(self),
3282 PyString_GET_SIZE(self)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003283 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003284 }
3285
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003286 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003287
Christian Heimes44720832008-05-26 13:01:01 +00003288 s = pad(self, fill, 0, '0');
3289
3290 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003291 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003292
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003293 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003294 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003295 /* move sign to beginning of string */
3296 p[0] = p[fill];
3297 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003298 }
3299
3300 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003301}
3302
Christian Heimes44720832008-05-26 13:01:01 +00003303PyDoc_STRVAR(isspace__doc__,
3304"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003305\n\
Christian Heimes44720832008-05-26 13:01:01 +00003306Return True if all characters in S are whitespace\n\
3307and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003308
Christian Heimes44720832008-05-26 13:01:01 +00003309static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003310string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003311{
Christian Heimes44720832008-05-26 13:01:01 +00003312 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003313 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003314 register const unsigned char *e;
3315
3316 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003317 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003318 isspace(*p))
3319 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003320
3321 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003322 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003323 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003324
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003325 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003326 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003327 if (!isspace(*p))
3328 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003329 }
Christian Heimes44720832008-05-26 13:01:01 +00003330 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003331}
3332
Christian Heimes44720832008-05-26 13:01:01 +00003333
3334PyDoc_STRVAR(isalpha__doc__,
3335"S.isalpha() -> bool\n\
3336\n\
3337Return True if all characters in S are alphabetic\n\
3338and there is at least one character in S, False otherwise.");
3339
3340static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003341string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003342{
Christian Heimes44720832008-05-26 13:01:01 +00003343 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003344 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003345 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003346
Christian Heimes44720832008-05-26 13:01:01 +00003347 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003348 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003349 isalpha(*p))
3350 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003351
3352 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003353 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003354 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003355
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003356 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003357 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003358 if (!isalpha(*p))
3359 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003360 }
Christian Heimes44720832008-05-26 13:01:01 +00003361 return PyBool_FromLong(1);
3362}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003363
Christian Heimes44720832008-05-26 13:01:01 +00003364
3365PyDoc_STRVAR(isalnum__doc__,
3366"S.isalnum() -> bool\n\
3367\n\
3368Return True if all characters in S are alphanumeric\n\
3369and there is at least one character in S, False otherwise.");
3370
3371static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003372string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003373{
3374 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003375 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003376 register const unsigned char *e;
3377
3378 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003379 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003380 isalnum(*p))
3381 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003382
3383 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003384 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003385 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003386
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003387 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003388 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003389 if (!isalnum(*p))
3390 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003391 }
3392 return PyBool_FromLong(1);
3393}
3394
3395
3396PyDoc_STRVAR(isdigit__doc__,
3397"S.isdigit() -> bool\n\
3398\n\
3399Return True if all characters in S are digits\n\
3400and there is at least one character in S, False otherwise.");
3401
3402static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003403string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003404{
3405 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003406 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003407 register const unsigned char *e;
3408
3409 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003410 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003411 isdigit(*p))
3412 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003413
3414 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003415 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003416 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003417
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003418 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003419 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003420 if (!isdigit(*p))
3421 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003422 }
3423 return PyBool_FromLong(1);
3424}
3425
3426
3427PyDoc_STRVAR(islower__doc__,
3428"S.islower() -> bool\n\
3429\n\
3430Return True if all cased characters in S are lowercase and there is\n\
3431at least one cased character in S, False otherwise.");
3432
3433static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003434string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003435{
3436 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003437 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003438 register const unsigned char *e;
3439 int cased;
3440
3441 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003442 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003443 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003444
3445 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003446 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003447 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003448
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003449 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003450 cased = 0;
3451 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003452 if (isupper(*p))
3453 return PyBool_FromLong(0);
3454 else if (!cased && islower(*p))
3455 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003456 }
3457 return PyBool_FromLong(cased);
3458}
3459
3460
3461PyDoc_STRVAR(isupper__doc__,
3462"S.isupper() -> bool\n\
3463\n\
3464Return True if all cased characters in S are uppercase and there is\n\
3465at least one cased character in S, False otherwise.");
3466
3467static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003468string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003469{
3470 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003471 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003472 register const unsigned char *e;
3473 int cased;
3474
3475 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003476 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003477 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003478
3479 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003480 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003481 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003482
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003483 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003484 cased = 0;
3485 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003486 if (islower(*p))
3487 return PyBool_FromLong(0);
3488 else if (!cased && isupper(*p))
3489 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003490 }
3491 return PyBool_FromLong(cased);
3492}
3493
3494
3495PyDoc_STRVAR(istitle__doc__,
3496"S.istitle() -> bool\n\
3497\n\
3498Return True if S is a titlecased string and there is at least one\n\
3499character in S, i.e. uppercase characters may only follow uncased\n\
3500characters and lowercase characters only cased ones. Return False\n\
3501otherwise.");
3502
3503static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003504string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003505{
3506 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003507 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003508 register const unsigned char *e;
3509 int cased, previous_is_cased;
3510
3511 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003512 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003513 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003514
3515 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003516 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003517 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003518
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003519 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003520 cased = 0;
3521 previous_is_cased = 0;
3522 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003523 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003524
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003525 if (isupper(ch)) {
3526 if (previous_is_cased)
3527 return PyBool_FromLong(0);
3528 previous_is_cased = 1;
3529 cased = 1;
3530 }
3531 else if (islower(ch)) {
3532 if (!previous_is_cased)
3533 return PyBool_FromLong(0);
3534 previous_is_cased = 1;
3535 cased = 1;
3536 }
3537 else
3538 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003539 }
3540 return PyBool_FromLong(cased);
3541}
3542
3543
3544PyDoc_STRVAR(splitlines__doc__,
Raymond Hettingeraad5b022012-06-02 01:42:58 -04003545"S.splitlines(keepends=False) -> list of strings\n\
Christian Heimes44720832008-05-26 13:01:01 +00003546\n\
3547Return a list of the lines in S, breaking at line boundaries.\n\
3548Line breaks are not included in the resulting list unless keepends\n\
3549is given and true.");
3550
3551static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003552string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003553{
Christian Heimes44720832008-05-26 13:01:01 +00003554 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003555
3556 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003557 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003558
Antoine Pitrou64672132010-01-13 07:55:48 +00003559 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003560 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3561 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003562 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003563}
3564
Robert Schuppenies51df0642008-06-01 16:16:17 +00003565PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003566"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003567
3568static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003569string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003570{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003571 Py_ssize_t res;
3572 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3573 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003574}
3575
Christian Heimes1a6387e2008-03-26 12:49:49 +00003576static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003577string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003579 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003580}
3581
Christian Heimes1a6387e2008-03-26 12:49:49 +00003582
Christian Heimes44720832008-05-26 13:01:01 +00003583#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003584
Christian Heimes44720832008-05-26 13:01:01 +00003585PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003586"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003587\n\
Eric Smith6c840852010-11-06 19:43:44 +00003588Return a formatted version of S, using substitutions from args and kwargs.\n\
3589The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003590
Eric Smithdc13b792008-05-30 18:10:04 +00003591static PyObject *
3592string__format__(PyObject* self, PyObject* args)
3593{
3594 PyObject *format_spec;
3595 PyObject *result = NULL;
3596 PyObject *tmp = NULL;
3597
3598 /* If 2.x, convert format_spec to the same type as value */
3599 /* This is to allow things like u''.format('') */
3600 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003601 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003602 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003603 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3604 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3605 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003606 }
3607 tmp = PyObject_Str(format_spec);
3608 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003609 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003610 format_spec = tmp;
3611
3612 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003613 PyString_AS_STRING(format_spec),
3614 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003615done:
3616 Py_XDECREF(tmp);
3617 return result;
3618}
3619
Christian Heimes44720832008-05-26 13:01:01 +00003620PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003621"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003622\n\
Eric Smith6c840852010-11-06 19:43:44 +00003623Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003624
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003625
Christian Heimes1a6387e2008-03-26 12:49:49 +00003626static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003627string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003628 /* Counterparts of the obsolete stropmodule functions; except
3629 string.maketrans(). */
3630 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3631 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3632 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3633 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3634 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3635 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3636 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3637 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3638 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3639 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3640 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3641 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3642 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3643 capitalize__doc__},
3644 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3645 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3646 endswith__doc__},
3647 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3648 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3649 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3650 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3651 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3652 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3653 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3654 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3655 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3656 rpartition__doc__},
3657 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3658 startswith__doc__},
3659 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3660 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3661 swapcase__doc__},
3662 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3663 translate__doc__},
3664 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3665 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3666 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3667 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3668 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3669 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3670 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3671 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3672 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3673 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3674 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3675 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3676 expandtabs__doc__},
3677 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3678 splitlines__doc__},
3679 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3680 sizeof__doc__},
3681 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3682 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003683};
3684
3685static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003686str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003687
Christian Heimes44720832008-05-26 13:01:01 +00003688static PyObject *
3689string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3690{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003691 PyObject *x = NULL;
3692 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003693
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003694 if (type != &PyString_Type)
3695 return str_subtype_new(type, args, kwds);
3696 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3697 return NULL;
3698 if (x == NULL)
3699 return PyString_FromString("");
3700 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003701}
3702
3703static PyObject *
3704str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3705{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003706 PyObject *tmp, *pnew;
3707 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003708
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003709 assert(PyType_IsSubtype(type, &PyString_Type));
3710 tmp = string_new(&PyString_Type, args, kwds);
3711 if (tmp == NULL)
3712 return NULL;
Serhiy Storchaka8d30ad72015-11-25 15:55:54 +02003713 assert(PyString_Check(tmp));
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003714 n = PyString_GET_SIZE(tmp);
3715 pnew = type->tp_alloc(type, n);
3716 if (pnew != NULL) {
3717 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3718 ((PyStringObject *)pnew)->ob_shash =
3719 ((PyStringObject *)tmp)->ob_shash;
3720 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3721 }
3722 Py_DECREF(tmp);
3723 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003724}
3725
3726static PyObject *
3727basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3728{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003729 PyErr_SetString(PyExc_TypeError,
3730 "The basestring type cannot be instantiated");
3731 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003732}
3733
3734static PyObject *
3735string_mod(PyObject *v, PyObject *w)
3736{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003737 if (!PyString_Check(v)) {
3738 Py_INCREF(Py_NotImplemented);
3739 return Py_NotImplemented;
3740 }
3741 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003742}
3743
3744PyDoc_STRVAR(basestring_doc,
3745"Type basestring cannot be instantiated; it is the base for str and unicode.");
3746
3747static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003748 0, /*nb_add*/
3749 0, /*nb_subtract*/
3750 0, /*nb_multiply*/
3751 0, /*nb_divide*/
3752 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003753};
3754
3755
3756PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003757 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3758 "basestring",
3759 0,
3760 0,
3761 0, /* tp_dealloc */
3762 0, /* tp_print */
3763 0, /* tp_getattr */
3764 0, /* tp_setattr */
3765 0, /* tp_compare */
3766 0, /* tp_repr */
3767 0, /* tp_as_number */
3768 0, /* tp_as_sequence */
3769 0, /* tp_as_mapping */
3770 0, /* tp_hash */
3771 0, /* tp_call */
3772 0, /* tp_str */
3773 0, /* tp_getattro */
3774 0, /* tp_setattro */
3775 0, /* tp_as_buffer */
3776 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3777 basestring_doc, /* tp_doc */
3778 0, /* tp_traverse */
3779 0, /* tp_clear */
3780 0, /* tp_richcompare */
3781 0, /* tp_weaklistoffset */
3782 0, /* tp_iter */
3783 0, /* tp_iternext */
3784 0, /* tp_methods */
3785 0, /* tp_members */
3786 0, /* tp_getset */
3787 &PyBaseObject_Type, /* tp_base */
3788 0, /* tp_dict */
3789 0, /* tp_descr_get */
3790 0, /* tp_descr_set */
3791 0, /* tp_dictoffset */
3792 0, /* tp_init */
3793 0, /* tp_alloc */
3794 basestring_new, /* tp_new */
3795 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003796};
3797
3798PyDoc_STRVAR(string_doc,
Chris Jerdonekad4b0002012-10-07 20:37:54 -07003799"str(object='') -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003800\n\
3801Return a nice string representation of the object.\n\
3802If the argument is a string, the return value is the same object.");
3803
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003804PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003805 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3806 "str",
3807 PyStringObject_SIZE,
3808 sizeof(char),
3809 string_dealloc, /* tp_dealloc */
3810 (printfunc)string_print, /* tp_print */
3811 0, /* tp_getattr */
3812 0, /* tp_setattr */
3813 0, /* tp_compare */
3814 string_repr, /* tp_repr */
3815 &string_as_number, /* tp_as_number */
3816 &string_as_sequence, /* tp_as_sequence */
3817 &string_as_mapping, /* tp_as_mapping */
3818 (hashfunc)string_hash, /* tp_hash */
3819 0, /* tp_call */
3820 string_str, /* tp_str */
3821 PyObject_GenericGetAttr, /* tp_getattro */
3822 0, /* tp_setattro */
3823 &string_as_buffer, /* tp_as_buffer */
3824 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3825 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3826 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3827 string_doc, /* tp_doc */
3828 0, /* tp_traverse */
3829 0, /* tp_clear */
3830 (richcmpfunc)string_richcompare, /* tp_richcompare */
3831 0, /* tp_weaklistoffset */
3832 0, /* tp_iter */
3833 0, /* tp_iternext */
3834 string_methods, /* tp_methods */
3835 0, /* tp_members */
3836 0, /* tp_getset */
3837 &PyBaseString_Type, /* tp_base */
3838 0, /* tp_dict */
3839 0, /* tp_descr_get */
3840 0, /* tp_descr_set */
3841 0, /* tp_dictoffset */
3842 0, /* tp_init */
3843 0, /* tp_alloc */
3844 string_new, /* tp_new */
3845 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003846};
3847
3848void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003849PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003850{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003851 register PyObject *v;
3852 if (*pv == NULL)
3853 return;
3854 if (w == NULL || !PyString_Check(*pv)) {
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02003855 Py_CLEAR(*pv);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003856 return;
3857 }
3858 v = string_concat((PyStringObject *) *pv, w);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03003859 Py_SETREF(*pv, v);
Christian Heimes44720832008-05-26 13:01:01 +00003860}
3861
3862void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003863PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003864{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003865 PyString_Concat(pv, w);
3866 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003867}
3868
3869
3870/* The following function breaks the notion that strings are immutable:
3871 it changes the size of a string. We get away with this only if there
3872 is only one module referencing the object. You can also think of it
3873 as creating a new string object and destroying the old one, only
3874 more efficiently. In any case, don't use this if the string may
3875 already be known to some other part of the code...
3876 Note that if there's not enough memory to resize the string, the original
3877 string object at *pv is deallocated, *pv is set to NULL, an "out of
3878 memory" exception is set, and -1 is returned. Else (on success) 0 is
3879 returned, and the value in *pv may or may not be the same as on input.
3880 As always, an extra byte is allocated for a trailing \0 byte (newsize
3881 does *not* include that), and a trailing \0 byte is stored.
3882*/
3883
3884int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003885_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003886{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003887 register PyObject *v;
3888 register PyStringObject *sv;
3889 v = *pv;
3890 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3891 PyString_CHECK_INTERNED(v)) {
3892 *pv = 0;
3893 Py_DECREF(v);
3894 PyErr_BadInternalCall();
3895 return -1;
3896 }
3897 /* XXX UNREF/NEWREF interface should be more symmetrical */
3898 _Py_DEC_REFTOTAL;
3899 _Py_ForgetReference(v);
3900 *pv = (PyObject *)
3901 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3902 if (*pv == NULL) {
3903 PyObject_Del(v);
3904 PyErr_NoMemory();
3905 return -1;
3906 }
3907 _Py_NewReference(*pv);
3908 sv = (PyStringObject *) *pv;
3909 Py_SIZE(sv) = newsize;
3910 sv->ob_sval[newsize] = '\0';
3911 sv->ob_shash = -1; /* invalidate cached hash value */
3912 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003913}
3914
3915/* Helpers for formatstring */
3916
3917Py_LOCAL_INLINE(PyObject *)
3918getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3919{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003920 Py_ssize_t argidx = *p_argidx;
3921 if (argidx < arglen) {
3922 (*p_argidx)++;
3923 if (arglen < 0)
3924 return args;
3925 else
3926 return PyTuple_GetItem(args, argidx);
3927 }
3928 PyErr_SetString(PyExc_TypeError,
3929 "not enough arguments for format string");
3930 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003931}
3932
3933/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003934 * F_LJUST '-'
3935 * F_SIGN '+'
3936 * F_BLANK ' '
3937 * F_ALT '#'
3938 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003939 */
3940#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003941#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003942#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003943#define F_ALT (1<<3)
3944#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003945
Mark Dickinson18cfada2009-11-23 18:46:41 +00003946/* Returns a new reference to a PyString object, or NULL on failure. */
3947
3948static PyObject *
3949formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003950{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003951 char *p;
3952 PyObject *result;
3953 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003954
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003955 x = PyFloat_AsDouble(v);
3956 if (x == -1.0 && PyErr_Occurred()) {
3957 PyErr_Format(PyExc_TypeError, "float argument required, "
3958 "not %.200s", Py_TYPE(v)->tp_name);
3959 return NULL;
3960 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003961
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003962 if (prec < 0)
3963 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003964
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003965 p = PyOS_double_to_string(x, type, prec,
3966 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003967
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003968 if (p == NULL)
3969 return NULL;
3970 result = PyString_FromStringAndSize(p, strlen(p));
3971 PyMem_Free(p);
3972 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003973}
3974
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003975/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003976 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3977 * Python's regular ints.
3978 * Return value: a new PyString*, or NULL if error.
3979 * . *pbuf is set to point into it,
3980 * *plen set to the # of chars following that.
3981 * Caller must decref it when done using pbuf.
3982 * The string starting at *pbuf is of the form
3983 * "-"? ("0x" | "0X")? digit+
3984 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3985 * set in flags. The case of hex digits will be correct,
3986 * There will be at least prec digits, zero-filled on the left if
3987 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003988 * val object to be converted
3989 * flags bitmask of format flags; only F_ALT is looked at
3990 * prec minimum number of digits; 0-fill on left if needed
3991 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003992 *
3993 * CAUTION: o, x and X conversions on regular ints can never
3994 * produce a '-' sign, but can for Python's unbounded ints.
3995 */
3996PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003997_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003998 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00003999{
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004000 PyObject *result = NULL, *r1;
4001 const char *s;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004002 char *buf;
4003 Py_ssize_t i;
4004 int sign; /* 1 if '-', else 0 */
4005 int len; /* number of characters */
4006 Py_ssize_t llen;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004007 int numdigits; /* len == numnondigits + skipped + numdigits */
4008 int numnondigits, skipped, filled;
4009 const char *method;
Christian Heimes44720832008-05-26 13:01:01 +00004010
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004011 switch (type) {
4012 case 'd':
4013 case 'u':
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004014 method = "str";
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004015 result = Py_TYPE(val)->tp_str(val);
4016 break;
4017 case 'o':
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004018 method = "oct";
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004019 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4020 break;
4021 case 'x':
4022 case 'X':
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004023 method = "hex";
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004024 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4025 break;
4026 default:
4027 assert(!"'type' not in [duoxX]");
4028 }
4029 if (!result)
4030 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004031
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004032 if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004033 Py_DECREF(result);
4034 return NULL;
4035 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004036 if (llen > INT_MAX) {
4037 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004038 Py_DECREF(result);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004039 return NULL;
4040 }
4041 len = (int)llen;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004042 if (len > 0 && s[len-1] == 'L') {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004043 --len;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004044 if (len == 0)
4045 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004046 }
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004047 sign = s[0] == '-';
4048 numnondigits = sign;
Christian Heimes44720832008-05-26 13:01:01 +00004049
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004050 /* Need to skip 0x, 0X or 0. */
4051 skipped = 0;
4052 switch (type) {
4053 case 'o':
4054 if (s[sign] != '0')
4055 goto error;
4056 /* If 0 is only digit, leave it alone. */
4057 if ((flags & F_ALT) == 0 && len - sign > 1)
4058 skipped = 1;
4059 break;
4060 case 'x':
4061 case 'X':
4062 if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
4063 goto error;
4064 if ((flags & F_ALT) == 0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004065 skipped = 2;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004066 else
4067 numnondigits += 2;
4068 break;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004069 }
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004070 numdigits = len - numnondigits - skipped;
4071 if (numdigits <= 0)
4072 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004073
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004074 filled = prec - numdigits;
4075 if (filled < 0)
4076 filled = 0;
4077 len = numnondigits + filled + numdigits;
4078
4079 /* To modify the string in-place, there can only be one reference. */
4080 if (skipped >= filled &&
4081 PyString_CheckExact(result) &&
4082 Py_REFCNT(result) == 1 &&
4083 !PyString_CHECK_INTERNED(result))
4084 {
4085 r1 = NULL;
4086 buf = (char *)s + skipped - filled;
4087 }
4088 else {
4089 r1 = result;
4090 result = PyString_FromStringAndSize(NULL, len);
4091 if (!result) {
4092 Py_DECREF(r1);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004093 return NULL;
4094 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004095 buf = PyString_AS_STRING(result);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004096 }
Christian Heimes44720832008-05-26 13:01:01 +00004097
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004098 for (i = numnondigits; --i >= 0;)
4099 buf[i] = s[i];
4100 buf += numnondigits;
4101 s += numnondigits + skipped;
4102 for (i = 0; i < filled; i++)
4103 *buf++ = '0';
4104 if (r1 == NULL) {
4105 assert(buf == s);
4106 buf += numdigits;
4107 }
4108 else {
4109 for (i = 0; i < numdigits; i++)
4110 *buf++ = *s++;
4111 }
4112 *buf = '\0';
4113 buf -= len;
4114 Py_XDECREF(r1);
4115
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004116 /* Fix up case for hex conversions. */
4117 if (type == 'X') {
4118 /* Need to convert all lower case letters to upper case.
4119 and need to convert 0x to 0X (and -0x to -0X). */
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004120 for (i = 0; i < len; i++) {
4121 if (buf[i] >= 'a' && buf[i] <= 'z')
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004122 buf[i] -= 'a'-'A';
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004123 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004124 }
4125 *pbuf = buf;
4126 *plen = len;
4127 return result;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004128
4129error:
4130 PyErr_Format(PyExc_ValueError,
4131 "%%%c format: invalid result of __%s__ (type=%.200s)",
4132 type, method, Py_TYPE(val)->tp_name);
4133 Py_DECREF(result);
4134 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004135}
4136
4137Py_LOCAL_INLINE(int)
4138formatint(char *buf, size_t buflen, int flags,
4139 int prec, int type, PyObject *v)
4140{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004141 /* fmt = '%#.' + `prec` + 'l' + `type`
4142 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4143 + 1 + 1 = 24 */
4144 char fmt[64]; /* plenty big enough! */
4145 char *sign;
4146 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004147
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004148 x = PyInt_AsLong(v);
4149 if (x == -1 && PyErr_Occurred()) {
4150 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4151 Py_TYPE(v)->tp_name);
4152 return -1;
4153 }
4154 if (x < 0 && type == 'u') {
4155 type = 'd';
4156 }
4157 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4158 sign = "-";
4159 else
4160 sign = "";
4161 if (prec < 0)
4162 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004163
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004164 if ((flags & F_ALT) &&
4165 (type == 'x' || type == 'X')) {
4166 /* When converting under %#x or %#X, there are a number
4167 * of issues that cause pain:
4168 * - when 0 is being converted, the C standard leaves off
4169 * the '0x' or '0X', which is inconsistent with other
4170 * %#x/%#X conversions and inconsistent with Python's
4171 * hex() function
4172 * - there are platforms that violate the standard and
4173 * convert 0 with the '0x' or '0X'
4174 * (Metrowerks, Compaq Tru64)
4175 * - there are platforms that give '0x' when converting
4176 * under %#X, but convert 0 in accordance with the
4177 * standard (OS/2 EMX)
4178 *
4179 * We can achieve the desired consistency by inserting our
4180 * own '0x' or '0X' prefix, and substituting %x/%X in place
4181 * of %#x/%#X.
4182 *
4183 * Note that this is the same approach as used in
4184 * formatint() in unicodeobject.c
4185 */
4186 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4187 sign, type, prec, type);
4188 }
4189 else {
4190 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4191 sign, (flags&F_ALT) ? "#" : "",
4192 prec, type);
4193 }
Christian Heimes44720832008-05-26 13:01:01 +00004194
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004195 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4196 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4197 */
4198 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4199 PyErr_SetString(PyExc_OverflowError,
4200 "formatted integer is too long (precision too large?)");
4201 return -1;
4202 }
4203 if (sign[0])
4204 PyOS_snprintf(buf, buflen, fmt, -x);
4205 else
4206 PyOS_snprintf(buf, buflen, fmt, x);
4207 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004208}
4209
4210Py_LOCAL_INLINE(int)
4211formatchar(char *buf, size_t buflen, PyObject *v)
4212{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004213 /* presume that the buffer is at least 2 characters long */
4214 if (PyString_Check(v)) {
4215 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4216 return -1;
4217 }
4218 else {
4219 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4220 return -1;
4221 }
4222 buf[1] = '\0';
4223 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004224}
4225
4226/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4227
Mark Dickinson18cfada2009-11-23 18:46:41 +00004228 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004229 chars are formatted. XXX This is a magic number. Each formatting
4230 routine does bounds checking to ensure no overflow, but a better
4231 solution may be to malloc a buffer of appropriate size for each
4232 format. For now, the current solution is sufficient.
4233*/
4234#define FORMATBUFLEN (size_t)120
4235
4236PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004237PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004238{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004239 char *fmt, *res;
4240 Py_ssize_t arglen, argidx;
4241 Py_ssize_t reslen, rescnt, fmtcnt;
4242 int args_owned = 0;
4243 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004244#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004245 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004246#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004247 PyObject *dict = NULL;
4248 if (format == NULL || !PyString_Check(format) || args == NULL) {
4249 PyErr_BadInternalCall();
4250 return NULL;
4251 }
4252 orig_args = args;
4253 fmt = PyString_AS_STRING(format);
4254 fmtcnt = PyString_GET_SIZE(format);
4255 reslen = rescnt = fmtcnt + 100;
4256 result = PyString_FromStringAndSize((char *)NULL, reslen);
4257 if (result == NULL)
4258 return NULL;
4259 res = PyString_AsString(result);
4260 if (PyTuple_Check(args)) {
4261 arglen = PyTuple_GET_SIZE(args);
4262 argidx = 0;
4263 }
4264 else {
4265 arglen = -1;
4266 argidx = -2;
4267 }
Benjamin Petersonda2c7eb2013-03-23 22:32:00 -05004268 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4269 !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004270 dict = args;
4271 while (--fmtcnt >= 0) {
4272 if (*fmt != '%') {
4273 if (--rescnt < 0) {
4274 rescnt = fmtcnt + 100;
4275 reslen += rescnt;
4276 if (_PyString_Resize(&result, reslen))
4277 return NULL;
4278 res = PyString_AS_STRING(result)
4279 + reslen - rescnt;
4280 --rescnt;
4281 }
4282 *res++ = *fmt++;
4283 }
4284 else {
4285 /* Got a format specifier */
4286 int flags = 0;
4287 Py_ssize_t width = -1;
4288 int prec = -1;
4289 int c = '\0';
4290 int fill;
4291 int isnumok;
4292 PyObject *v = NULL;
4293 PyObject *temp = NULL;
4294 char *pbuf;
4295 int sign;
4296 Py_ssize_t len;
4297 char formatbuf[FORMATBUFLEN];
4298 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004299#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004300 char *fmt_start = fmt;
4301 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004302#endif
4303
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004304 fmt++;
4305 if (*fmt == '(') {
4306 char *keystart;
4307 Py_ssize_t keylen;
4308 PyObject *key;
4309 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004310
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004311 if (dict == NULL) {
4312 PyErr_SetString(PyExc_TypeError,
4313 "format requires a mapping");
4314 goto error;
4315 }
4316 ++fmt;
4317 --fmtcnt;
4318 keystart = fmt;
4319 /* Skip over balanced parentheses */
4320 while (pcount > 0 && --fmtcnt >= 0) {
4321 if (*fmt == ')')
4322 --pcount;
4323 else if (*fmt == '(')
4324 ++pcount;
4325 fmt++;
4326 }
4327 keylen = fmt - keystart - 1;
4328 if (fmtcnt < 0 || pcount > 0) {
4329 PyErr_SetString(PyExc_ValueError,
4330 "incomplete format key");
4331 goto error;
4332 }
4333 key = PyString_FromStringAndSize(keystart,
4334 keylen);
4335 if (key == NULL)
4336 goto error;
4337 if (args_owned) {
4338 Py_DECREF(args);
4339 args_owned = 0;
4340 }
4341 args = PyObject_GetItem(dict, key);
4342 Py_DECREF(key);
4343 if (args == NULL) {
4344 goto error;
4345 }
4346 args_owned = 1;
4347 arglen = -1;
4348 argidx = -2;
4349 }
4350 while (--fmtcnt >= 0) {
4351 switch (c = *fmt++) {
4352 case '-': flags |= F_LJUST; continue;
4353 case '+': flags |= F_SIGN; continue;
4354 case ' ': flags |= F_BLANK; continue;
4355 case '#': flags |= F_ALT; continue;
4356 case '0': flags |= F_ZERO; continue;
4357 }
4358 break;
4359 }
4360 if (c == '*') {
4361 v = getnextarg(args, arglen, &argidx);
4362 if (v == NULL)
4363 goto error;
4364 if (!PyInt_Check(v)) {
4365 PyErr_SetString(PyExc_TypeError,
4366 "* wants int");
4367 goto error;
4368 }
Serhiy Storchaka926f3a32013-01-19 23:35:46 +02004369 width = PyInt_AsSsize_t(v);
4370 if (width == -1 && PyErr_Occurred())
4371 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004372 if (width < 0) {
4373 flags |= F_LJUST;
4374 width = -width;
4375 }
4376 if (--fmtcnt >= 0)
4377 c = *fmt++;
4378 }
4379 else if (c >= 0 && isdigit(c)) {
4380 width = c - '0';
4381 while (--fmtcnt >= 0) {
4382 c = Py_CHARMASK(*fmt++);
4383 if (!isdigit(c))
4384 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004385 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004386 PyErr_SetString(
4387 PyExc_ValueError,
4388 "width too big");
4389 goto error;
4390 }
4391 width = width*10 + (c - '0');
4392 }
4393 }
4394 if (c == '.') {
4395 prec = 0;
4396 if (--fmtcnt >= 0)
4397 c = *fmt++;
4398 if (c == '*') {
4399 v = getnextarg(args, arglen, &argidx);
4400 if (v == NULL)
4401 goto error;
4402 if (!PyInt_Check(v)) {
4403 PyErr_SetString(
4404 PyExc_TypeError,
4405 "* wants int");
4406 goto error;
4407 }
Serhiy Storchaka926f3a32013-01-19 23:35:46 +02004408 prec = _PyInt_AsInt(v);
4409 if (prec == -1 && PyErr_Occurred())
4410 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004411 if (prec < 0)
4412 prec = 0;
4413 if (--fmtcnt >= 0)
4414 c = *fmt++;
4415 }
4416 else if (c >= 0 && isdigit(c)) {
4417 prec = c - '0';
4418 while (--fmtcnt >= 0) {
4419 c = Py_CHARMASK(*fmt++);
4420 if (!isdigit(c))
4421 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004422 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004423 PyErr_SetString(
4424 PyExc_ValueError,
4425 "prec too big");
4426 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004427 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004428 prec = prec*10 + (c - '0');
4429 }
4430 }
4431 } /* prec */
4432 if (fmtcnt >= 0) {
4433 if (c == 'h' || c == 'l' || c == 'L') {
4434 if (--fmtcnt >= 0)
4435 c = *fmt++;
4436 }
4437 }
4438 if (fmtcnt < 0) {
4439 PyErr_SetString(PyExc_ValueError,
4440 "incomplete format");
4441 goto error;
4442 }
4443 if (c != '%') {
4444 v = getnextarg(args, arglen, &argidx);
4445 if (v == NULL)
4446 goto error;
4447 }
4448 sign = 0;
4449 fill = ' ';
4450 switch (c) {
4451 case '%':
4452 pbuf = "%";
4453 len = 1;
4454 break;
4455 case 's':
4456#ifdef Py_USING_UNICODE
4457 if (PyUnicode_Check(v)) {
4458 fmt = fmt_start;
4459 argidx = argidx_start;
4460 goto unicode;
4461 }
4462#endif
4463 temp = _PyObject_Str(v);
4464#ifdef Py_USING_UNICODE
4465 if (temp != NULL && PyUnicode_Check(temp)) {
4466 Py_DECREF(temp);
4467 fmt = fmt_start;
4468 argidx = argidx_start;
4469 goto unicode;
4470 }
4471#endif
4472 /* Fall through */
4473 case 'r':
4474 if (c == 'r')
4475 temp = PyObject_Repr(v);
4476 if (temp == NULL)
4477 goto error;
4478 if (!PyString_Check(temp)) {
4479 PyErr_SetString(PyExc_TypeError,
4480 "%s argument has non-string str()");
4481 Py_DECREF(temp);
4482 goto error;
4483 }
4484 pbuf = PyString_AS_STRING(temp);
4485 len = PyString_GET_SIZE(temp);
4486 if (prec >= 0 && len > prec)
4487 len = prec;
4488 break;
4489 case 'i':
4490 case 'd':
4491 case 'u':
4492 case 'o':
4493 case 'x':
4494 case 'X':
4495 if (c == 'i')
4496 c = 'd';
4497 isnumok = 0;
4498 if (PyNumber_Check(v)) {
4499 PyObject *iobj=NULL;
4500
4501 if (PyInt_Check(v) || (PyLong_Check(v))) {
4502 iobj = v;
4503 Py_INCREF(iobj);
4504 }
4505 else {
4506 iobj = PyNumber_Int(v);
Benjamin Petersona708adf2013-01-02 12:21:32 -06004507 if (iobj==NULL) {
Benjamin Peterson8f53ded2013-01-02 12:25:15 -06004508 PyErr_Clear();
4509 iobj = PyNumber_Long(v);
4510 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004511 }
4512 if (iobj!=NULL) {
4513 if (PyInt_Check(iobj)) {
4514 isnumok = 1;
4515 pbuf = formatbuf;
4516 len = formatint(pbuf,
4517 sizeof(formatbuf),
4518 flags, prec, c, iobj);
4519 Py_DECREF(iobj);
4520 if (len < 0)
4521 goto error;
4522 sign = 1;
4523 }
4524 else if (PyLong_Check(iobj)) {
4525 int ilen;
4526
4527 isnumok = 1;
4528 temp = _PyString_FormatLong(iobj, flags,
4529 prec, c, &pbuf, &ilen);
4530 Py_DECREF(iobj);
4531 len = ilen;
4532 if (!temp)
4533 goto error;
4534 sign = 1;
4535 }
4536 else {
4537 Py_DECREF(iobj);
4538 }
4539 }
4540 }
4541 if (!isnumok) {
4542 PyErr_Format(PyExc_TypeError,
4543 "%%%c format: a number is required, "
4544 "not %.200s", c, Py_TYPE(v)->tp_name);
4545 goto error;
4546 }
4547 if (flags & F_ZERO)
4548 fill = '0';
4549 break;
4550 case 'e':
4551 case 'E':
4552 case 'f':
4553 case 'F':
4554 case 'g':
4555 case 'G':
4556 temp = formatfloat(v, flags, prec, c);
4557 if (temp == NULL)
4558 goto error;
4559 pbuf = PyString_AS_STRING(temp);
4560 len = PyString_GET_SIZE(temp);
4561 sign = 1;
4562 if (flags & F_ZERO)
4563 fill = '0';
4564 break;
4565 case 'c':
4566#ifdef Py_USING_UNICODE
4567 if (PyUnicode_Check(v)) {
4568 fmt = fmt_start;
4569 argidx = argidx_start;
4570 goto unicode;
4571 }
4572#endif
4573 pbuf = formatbuf;
4574 len = formatchar(pbuf, sizeof(formatbuf), v);
4575 if (len < 0)
4576 goto error;
4577 break;
4578 default:
4579 PyErr_Format(PyExc_ValueError,
4580 "unsupported format character '%c' (0x%x) "
4581 "at index %zd",
4582 c, c,
4583 (Py_ssize_t)(fmt - 1 -
4584 PyString_AsString(format)));
4585 goto error;
4586 }
4587 if (sign) {
4588 if (*pbuf == '-' || *pbuf == '+') {
4589 sign = *pbuf++;
4590 len--;
4591 }
4592 else if (flags & F_SIGN)
4593 sign = '+';
4594 else if (flags & F_BLANK)
4595 sign = ' ';
4596 else
4597 sign = 0;
4598 }
4599 if (width < len)
4600 width = len;
4601 if (rescnt - (sign != 0) < width) {
4602 reslen -= rescnt;
4603 rescnt = width + fmtcnt + 100;
4604 reslen += rescnt;
4605 if (reslen < 0) {
4606 Py_DECREF(result);
4607 Py_XDECREF(temp);
4608 return PyErr_NoMemory();
4609 }
4610 if (_PyString_Resize(&result, reslen)) {
4611 Py_XDECREF(temp);
4612 return NULL;
4613 }
4614 res = PyString_AS_STRING(result)
4615 + reslen - rescnt;
4616 }
4617 if (sign) {
4618 if (fill != ' ')
4619 *res++ = sign;
4620 rescnt--;
4621 if (width > len)
4622 width--;
4623 }
4624 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4625 assert(pbuf[0] == '0');
4626 assert(pbuf[1] == c);
4627 if (fill != ' ') {
4628 *res++ = *pbuf++;
4629 *res++ = *pbuf++;
4630 }
4631 rescnt -= 2;
4632 width -= 2;
4633 if (width < 0)
4634 width = 0;
4635 len -= 2;
4636 }
4637 if (width > len && !(flags & F_LJUST)) {
4638 do {
4639 --rescnt;
4640 *res++ = fill;
4641 } while (--width > len);
4642 }
4643 if (fill == ' ') {
4644 if (sign)
4645 *res++ = sign;
4646 if ((flags & F_ALT) &&
4647 (c == 'x' || c == 'X')) {
4648 assert(pbuf[0] == '0');
4649 assert(pbuf[1] == c);
4650 *res++ = *pbuf++;
4651 *res++ = *pbuf++;
4652 }
4653 }
4654 Py_MEMCPY(res, pbuf, len);
4655 res += len;
4656 rescnt -= len;
4657 while (--width >= len) {
4658 --rescnt;
4659 *res++ = ' ';
4660 }
4661 if (dict && (argidx < arglen) && c != '%') {
4662 PyErr_SetString(PyExc_TypeError,
4663 "not all arguments converted during string formatting");
4664 Py_XDECREF(temp);
4665 goto error;
4666 }
4667 Py_XDECREF(temp);
4668 } /* '%' */
4669 } /* until end */
4670 if (argidx < arglen && !dict) {
4671 PyErr_SetString(PyExc_TypeError,
4672 "not all arguments converted during string formatting");
4673 goto error;
4674 }
4675 if (args_owned) {
4676 Py_DECREF(args);
4677 }
4678 if (_PyString_Resize(&result, reslen - rescnt))
4679 return NULL;
4680 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004681
4682#ifdef Py_USING_UNICODE
4683 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004684 if (args_owned) {
4685 Py_DECREF(args);
4686 args_owned = 0;
4687 }
4688 /* Fiddle args right (remove the first argidx arguments) */
4689 if (PyTuple_Check(orig_args) && argidx > 0) {
4690 PyObject *v;
4691 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4692 v = PyTuple_New(n);
4693 if (v == NULL)
4694 goto error;
4695 while (--n >= 0) {
4696 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4697 Py_INCREF(w);
4698 PyTuple_SET_ITEM(v, n, w);
4699 }
4700 args = v;
4701 } else {
4702 Py_INCREF(orig_args);
4703 args = orig_args;
4704 }
4705 args_owned = 1;
4706 /* Take what we have of the result and let the Unicode formatting
4707 function format the rest of the input. */
4708 rescnt = res - PyString_AS_STRING(result);
4709 if (_PyString_Resize(&result, rescnt))
4710 goto error;
4711 fmtcnt = PyString_GET_SIZE(format) - \
4712 (fmt - PyString_AS_STRING(format));
4713 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4714 if (format == NULL)
4715 goto error;
4716 v = PyUnicode_Format(format, args);
4717 Py_DECREF(format);
4718 if (v == NULL)
4719 goto error;
4720 /* Paste what we have (result) to what the Unicode formatting
4721 function returned (v) and return the result (or error) */
4722 w = PyUnicode_Concat(result, v);
4723 Py_DECREF(result);
4724 Py_DECREF(v);
4725 Py_DECREF(args);
4726 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004727#endif /* Py_USING_UNICODE */
4728
4729 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004730 Py_DECREF(result);
4731 if (args_owned) {
4732 Py_DECREF(args);
4733 }
4734 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004735}
4736
4737void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004738PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004739{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004740 register PyStringObject *s = (PyStringObject *)(*p);
4741 PyObject *t;
4742 if (s == NULL || !PyString_Check(s))
4743 Py_FatalError("PyString_InternInPlace: strings only please!");
4744 /* If it's a string subclass, we don't really know what putting
4745 it in the interned dict might do. */
4746 if (!PyString_CheckExact(s))
4747 return;
4748 if (PyString_CHECK_INTERNED(s))
4749 return;
4750 if (interned == NULL) {
4751 interned = PyDict_New();
4752 if (interned == NULL) {
4753 PyErr_Clear(); /* Don't leave an exception */
4754 return;
4755 }
4756 }
4757 t = PyDict_GetItem(interned, (PyObject *)s);
4758 if (t) {
4759 Py_INCREF(t);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03004760 Py_SETREF(*p, t);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004761 return;
4762 }
Christian Heimes44720832008-05-26 13:01:01 +00004763
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004764 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4765 PyErr_Clear();
4766 return;
4767 }
4768 /* The two references in interned are not counted by refcnt.
4769 The string deallocator will take care of this */
4770 Py_REFCNT(s) -= 2;
4771 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004772}
4773
4774void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004775PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004776{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004777 PyString_InternInPlace(p);
4778 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4779 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4780 Py_INCREF(*p);
4781 }
Christian Heimes44720832008-05-26 13:01:01 +00004782}
4783
4784
4785PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004786PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004787{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004788 PyObject *s = PyString_FromString(cp);
4789 if (s == NULL)
4790 return NULL;
4791 PyString_InternInPlace(&s);
4792 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004793}
4794
4795void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004796PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004797{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004798 int i;
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02004799 for (i = 0; i < UCHAR_MAX + 1; i++)
4800 Py_CLEAR(characters[i]);
4801 Py_CLEAR(nullstring);
Christian Heimes44720832008-05-26 13:01:01 +00004802}
4803
4804void _Py_ReleaseInternedStrings(void)
4805{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004806 PyObject *keys;
4807 PyStringObject *s;
4808 Py_ssize_t i, n;
4809 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004810
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004811 if (interned == NULL || !PyDict_Check(interned))
4812 return;
4813 keys = PyDict_Keys(interned);
4814 if (keys == NULL || !PyList_Check(keys)) {
4815 PyErr_Clear();
4816 return;
4817 }
Christian Heimes44720832008-05-26 13:01:01 +00004818
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004819 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4820 detector, interned strings are not forcibly deallocated; rather, we
4821 give them their stolen references back, and then clear and DECREF
4822 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004823
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004824 n = PyList_GET_SIZE(keys);
4825 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4826 n);
4827 for (i = 0; i < n; i++) {
4828 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4829 switch (s->ob_sstate) {
4830 case SSTATE_NOT_INTERNED:
4831 /* XXX Shouldn't happen */
4832 break;
4833 case SSTATE_INTERNED_IMMORTAL:
4834 Py_REFCNT(s) += 1;
4835 immortal_size += Py_SIZE(s);
4836 break;
4837 case SSTATE_INTERNED_MORTAL:
4838 Py_REFCNT(s) += 2;
4839 mortal_size += Py_SIZE(s);
4840 break;
4841 default:
4842 Py_FatalError("Inconsistent interned string state.");
4843 }
4844 s->ob_sstate = SSTATE_NOT_INTERNED;
4845 }
4846 fprintf(stderr, "total size of all interned strings: "
4847 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4848 "mortal/immortal\n", mortal_size, immortal_size);
4849 Py_DECREF(keys);
4850 PyDict_Clear(interned);
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02004851 Py_CLEAR(interned);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004852}