blob: 59d22e76946bb22f8dcc088f5fbb891899ece88c [file] [log] [blame]
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001/* String (str/bytes) object implementation */
Christian Heimes1a6387e2008-03-26 12:49:49 +00002
3#define PY_SSIZE_T_CLEAN
Christian Heimes44720832008-05-26 13:01:01 +00004
Christian Heimes1a6387e2008-03-26 12:49:49 +00005#include "Python.h"
Christian Heimes44720832008-05-26 13:01:01 +00006#include <ctype.h>
Mark Dickinson826f3fe2008-12-05 21:55:28 +00007#include <stddef.h>
Christian Heimes44720832008-05-26 13:01:01 +00008
9#ifdef COUNT_ALLOCS
Martin v. Löwisb90304a2009-01-07 18:40:40 +000010Py_ssize_t null_strings, one_strings;
Christian Heimes44720832008-05-26 13:01:01 +000011#endif
12
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000013static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
Christian Heimes44720832008-05-26 13:01:01 +000015
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
Mark Dickinson826f3fe2008-12-05 21:55:28 +000026/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27 for a string of length n should request PyStringObject_SIZE + n bytes.
28
29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30 3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
Christian Heimes44720832008-05-26 13:01:01 +000034/*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000035 For PyString_FromString(), the parameter `str' points to a null-terminated
Christian Heimes44720832008-05-26 13:01:01 +000036 string containing exactly `size' bytes.
37
Martin Panter200a6152016-05-30 04:04:50 +000038 For PyString_FromStringAndSize(), the parameter `str' is
Christian Heimes44720832008-05-26 13:01:01 +000039 either NULL or else points to a string containing at least `size' bytes.
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000040 For PyString_FromStringAndSize(), the string in the `str' parameter does
Christian Heimes44720832008-05-26 13:01:01 +000041 not have to be null-terminated. (Therefore it is safe to construct a
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000042 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
Christian Heimes44720832008-05-26 13:01:01 +000044 bytes (setting the last byte to the null terminating character) and you can
45 fill in the data yourself. If `str' is non-NULL then the resulting
46 PyString object must be treated as immutable and you must not fill in nor
47 alter the data yourself, since the strings may be shared.
48
49 The PyObject member `op->ob_size', which denotes the number of "extra
50 items" in a variable-size object, will contain the number of bytes
Eli Bendersky72de2052011-03-24 22:38:25 +020051 allocated for string data, not counting the null terminating character.
52 It is therefore equal to the `size' parameter (for
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000053 PyString_FromStringAndSize()) or the length of the string in the `str'
54 parameter (for PyString_FromString()).
Christian Heimes44720832008-05-26 13:01:01 +000055*/
56PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +000057PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Christian Heimes1a6387e2008-03-26 12:49:49 +000058{
Antoine Pitrouc83ea132010-05-09 14:46:46 +000059 register PyStringObject *op;
60 if (size < 0) {
61 PyErr_SetString(PyExc_SystemError,
62 "Negative size passed to PyString_FromStringAndSize");
63 return NULL;
64 }
65 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +000066#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000068#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069 Py_INCREF(op);
70 return (PyObject *)op;
71 }
72 if (size == 1 && str != NULL &&
73 (op = characters[*str & UCHAR_MAX]) != NULL)
74 {
Christian Heimes44720832008-05-26 13:01:01 +000075#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +000076 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +000077#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 Py_INCREF(op);
79 return (PyObject *)op;
80 }
Christian Heimes44720832008-05-26 13:01:01 +000081
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83 PyErr_SetString(PyExc_OverflowError, "string is too large");
84 return NULL;
85 }
Neal Norwitze7d8be82008-07-31 17:17:14 +000086
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 /* Inline PyObject_NewVar */
88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89 if (op == NULL)
90 return PyErr_NoMemory();
Martin Panter646b5282016-06-21 23:58:05 +000091 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
Antoine Pitrouc83ea132010-05-09 14:46:46 +000092 op->ob_shash = -1;
93 op->ob_sstate = SSTATE_NOT_INTERNED;
94 if (str != NULL)
95 Py_MEMCPY(op->ob_sval, str, size);
96 op->ob_sval[size] = '\0';
97 /* share short strings */
98 if (size == 0) {
99 PyObject *t = (PyObject *)op;
100 PyString_InternInPlace(&t);
101 op = (PyStringObject *)t;
102 nullstring = op;
103 Py_INCREF(op);
104 } else if (size == 1 && str != NULL) {
105 PyObject *t = (PyObject *)op;
106 PyString_InternInPlace(&t);
107 op = (PyStringObject *)t;
108 characters[*str & UCHAR_MAX] = op;
109 Py_INCREF(op);
110 }
111 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000112}
113
Christian Heimes44720832008-05-26 13:01:01 +0000114PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000115PyString_FromString(const char *str)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000116{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 register size_t size;
118 register PyStringObject *op;
Christian Heimes44720832008-05-26 13:01:01 +0000119
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000120 assert(str != NULL);
121 size = strlen(str);
122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123 PyErr_SetString(PyExc_OverflowError,
124 "string is too long for a Python string");
125 return NULL;
126 }
127 if (size == 0 && (op = nullstring) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000128#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000129 null_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000130#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 Py_INCREF(op);
132 return (PyObject *)op;
133 }
134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
Christian Heimes44720832008-05-26 13:01:01 +0000135#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000136 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +0000137#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000138 Py_INCREF(op);
139 return (PyObject *)op;
140 }
Christian Heimes44720832008-05-26 13:01:01 +0000141
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000142 /* Inline PyObject_NewVar */
143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144 if (op == NULL)
145 return PyErr_NoMemory();
Martin Panter646b5282016-06-21 23:58:05 +0000146 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000147 op->ob_shash = -1;
148 op->ob_sstate = SSTATE_NOT_INTERNED;
149 Py_MEMCPY(op->ob_sval, str, size+1);
150 /* share short strings */
151 if (size == 0) {
152 PyObject *t = (PyObject *)op;
153 PyString_InternInPlace(&t);
154 op = (PyStringObject *)t;
155 nullstring = op;
156 Py_INCREF(op);
157 } else if (size == 1) {
158 PyObject *t = (PyObject *)op;
159 PyString_InternInPlace(&t);
160 op = (PyStringObject *)t;
161 characters[*str & UCHAR_MAX] = op;
162 Py_INCREF(op);
163 }
164 return (PyObject *) op;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000165}
166
Christian Heimes44720832008-05-26 13:01:01 +0000167PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000168PyString_FromFormatV(const char *format, va_list vargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000169{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000170 va_list count;
171 Py_ssize_t n = 0;
172 const char* f;
173 char *s;
174 PyObject* string;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000175
Christian Heimes44720832008-05-26 13:01:01 +0000176#ifdef VA_LIST_IS_ARRAY
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000177 Py_MEMCPY(count, vargs, sizeof(va_list));
Christian Heimes44720832008-05-26 13:01:01 +0000178#else
179#ifdef __va_copy
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000180 __va_copy(count, vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000181#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000182 count = vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000183#endif
184#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 /* step 1: figure out how large a buffer we need */
186 for (f = format; *f; f++) {
187 if (*f == '%') {
Mark Dickinson82864d12009-11-15 16:18:58 +0000188#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000189 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000190#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 const char* p = f;
192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193 ;
Christian Heimes44720832008-05-26 13:01:01 +0000194
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196 * they don't affect the amount of space we reserve.
197 */
198 if (*f == 'l') {
199 if (f[1] == 'd' || f[1] == 'u') {
200 ++f;
201 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000202#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 else if (f[1] == 'l' &&
204 (f[2] == 'd' || f[2] == 'u')) {
205 longlongflag = 1;
206 f += 2;
207 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000208#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 }
210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211 ++f;
212 }
Christian Heimes44720832008-05-26 13:01:01 +0000213
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000214 switch (*f) {
215 case 'c':
216 (void)va_arg(count, int);
217 /* fall through... */
218 case '%':
219 n++;
220 break;
221 case 'd': case 'u': case 'i': case 'x':
222 (void) va_arg(count, int);
Mark Dickinson82864d12009-11-15 16:18:58 +0000223#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 /* Need at most
225 ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226 plus 1 for the sign. 53/22 is an upper
227 bound for log10(256). */
228 if (longlongflag)
229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230 else
Mark Dickinson82864d12009-11-15 16:18:58 +0000231#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000232 /* 20 bytes is enough to hold a 64-bit
233 integer. Decimal takes the most
234 space. This isn't enough for
235 octal. */
236 n += 20;
Mark Dickinson82864d12009-11-15 16:18:58 +0000237
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000238 break;
239 case 's':
240 s = va_arg(count, char*);
241 n += strlen(s);
242 break;
243 case 'p':
244 (void) va_arg(count, int);
245 /* maximum 64-bit pointer representation:
246 * 0xffffffffffffffff
247 * so 19 characters is enough.
248 * XXX I count 18 -- what's the extra for?
249 */
250 n += 19;
251 break;
252 default:
253 /* if we stumble upon an unknown
254 formatting code, copy the rest of
255 the format string to the output
256 string. (we cannot just skip the
257 code, since there's no way to know
258 what's in the argument list) */
259 n += strlen(p);
260 goto expand;
261 }
262 } else
263 n++;
264 }
Christian Heimes44720832008-05-26 13:01:01 +0000265 expand:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000266 /* step 2: fill the buffer */
267 /* Since we've analyzed how much space we need for the worst case,
268 use sprintf directly instead of the slower PyOS_snprintf. */
269 string = PyString_FromStringAndSize(NULL, n);
270 if (!string)
271 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000272
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000273 s = PyString_AsString(string);
Christian Heimes44720832008-05-26 13:01:01 +0000274
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000275 for (f = format; *f; f++) {
276 if (*f == '%') {
277 const char* p = f++;
278 Py_ssize_t i;
279 int longflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000280#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 int longlongflag = 0;
Mark Dickinson82864d12009-11-15 16:18:58 +0000282#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 int size_tflag = 0;
284 /* parse the width.precision part (we're only
285 interested in the precision value, if any) */
286 n = 0;
287 while (isdigit(Py_CHARMASK(*f)))
288 n = (n*10) + *f++ - '0';
289 if (*f == '.') {
290 f++;
291 n = 0;
292 while (isdigit(Py_CHARMASK(*f)))
293 n = (n*10) + *f++ - '0';
294 }
295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296 f++;
297 /* Handle %ld, %lu, %lld and %llu. */
298 if (*f == 'l') {
299 if (f[1] == 'd' || f[1] == 'u') {
300 longflag = 1;
301 ++f;
302 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000303#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000304 else if (f[1] == 'l' &&
305 (f[2] == 'd' || f[2] == 'u')) {
306 longlongflag = 1;
307 f += 2;
308 }
Mark Dickinson82864d12009-11-15 16:18:58 +0000309#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 }
311 /* handle the size_t flag. */
312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313 size_tflag = 1;
314 ++f;
315 }
Christian Heimes44720832008-05-26 13:01:01 +0000316
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 switch (*f) {
318 case 'c':
319 *s++ = va_arg(vargs, int);
320 break;
321 case 'd':
322 if (longflag)
323 sprintf(s, "%ld", va_arg(vargs, long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000324#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 else if (longlongflag)
326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000328#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 else if (size_tflag)
330 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331 va_arg(vargs, Py_ssize_t));
332 else
333 sprintf(s, "%d", va_arg(vargs, int));
334 s += strlen(s);
335 break;
336 case 'u':
337 if (longflag)
338 sprintf(s, "%lu",
339 va_arg(vargs, unsigned long));
Mark Dickinson82864d12009-11-15 16:18:58 +0000340#ifdef HAVE_LONG_LONG
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 else if (longlongflag)
342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343 va_arg(vargs, PY_LONG_LONG));
Mark Dickinson82864d12009-11-15 16:18:58 +0000344#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000345 else if (size_tflag)
346 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347 va_arg(vargs, size_t));
348 else
349 sprintf(s, "%u",
350 va_arg(vargs, unsigned int));
351 s += strlen(s);
352 break;
353 case 'i':
354 sprintf(s, "%i", va_arg(vargs, int));
355 s += strlen(s);
356 break;
357 case 'x':
358 sprintf(s, "%x", va_arg(vargs, int));
359 s += strlen(s);
360 break;
361 case 's':
362 p = va_arg(vargs, char*);
363 i = strlen(p);
364 if (n > 0 && i > n)
365 i = n;
366 Py_MEMCPY(s, p, i);
367 s += i;
368 break;
369 case 'p':
370 sprintf(s, "%p", va_arg(vargs, void*));
371 /* %p is ill-defined: ensure leading 0x. */
372 if (s[1] == 'X')
373 s[1] = 'x';
374 else if (s[1] != 'x') {
375 memmove(s+2, s, strlen(s)+1);
376 s[0] = '0';
377 s[1] = 'x';
378 }
379 s += strlen(s);
380 break;
381 case '%':
382 *s++ = '%';
383 break;
384 default:
385 strcpy(s, p);
386 s += strlen(s);
387 goto end;
388 }
389 } else
390 *s++ = *f;
391 }
Christian Heimes44720832008-05-26 13:01:01 +0000392
393 end:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395 return NULL;
396 return string;
Christian Heimes44720832008-05-26 13:01:01 +0000397}
398
399PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000400PyString_FromFormat(const char *format, ...)
Christian Heimes44720832008-05-26 13:01:01 +0000401{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000402 PyObject* ret;
403 va_list vargs;
Christian Heimes44720832008-05-26 13:01:01 +0000404
405#ifdef HAVE_STDARG_PROTOTYPES
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000406 va_start(vargs, format);
Christian Heimes44720832008-05-26 13:01:01 +0000407#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 va_start(vargs);
Christian Heimes44720832008-05-26 13:01:01 +0000409#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 ret = PyString_FromFormatV(format, vargs);
411 va_end(vargs);
412 return ret;
Christian Heimes44720832008-05-26 13:01:01 +0000413}
414
415
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000416PyObject *PyString_Decode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 Py_ssize_t size,
418 const char *encoding,
419 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000420{
421 PyObject *v, *str;
422
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000423 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000424 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000425 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000426 v = PyString_AsDecodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000427 Py_DECREF(str);
428 return v;
429}
430
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000431PyObject *PyString_AsDecodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000432 const char *encoding,
433 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000434{
435 PyObject *v;
436
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000437 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000438 PyErr_BadArgument();
439 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000440 }
441
Christian Heimes44720832008-05-26 13:01:01 +0000442 if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000444 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000445#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000446 PyErr_SetString(PyExc_ValueError, "no encoding specified");
447 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000448#endif
Christian Heimes1a6387e2008-03-26 12:49:49 +0000449 }
Christian Heimes44720832008-05-26 13:01:01 +0000450
451 /* Decode via the codec registry */
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300452 v = _PyCodec_DecodeText(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000453 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000454 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000455
456 return v;
457
458 onError:
459 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000460}
461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000462PyObject *PyString_AsDecodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 const char *encoding,
464 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000465{
Christian Heimes44720832008-05-26 13:01:01 +0000466 PyObject *v;
467
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000468 v = PyString_AsDecodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000469 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000470 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000471
472#ifdef Py_USING_UNICODE
473 /* Convert Unicode to a string using the default encoding */
474 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000475 PyObject *temp = v;
476 v = PyUnicode_AsEncodedString(v, NULL, NULL);
477 Py_DECREF(temp);
478 if (v == NULL)
479 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000480 }
Christian Heimes44720832008-05-26 13:01:01 +0000481#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000482 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000483 PyErr_Format(PyExc_TypeError,
484 "decoder did not return a string object (type=%.400s)",
485 Py_TYPE(v)->tp_name);
486 Py_DECREF(v);
487 goto onError;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000488 }
Christian Heimes44720832008-05-26 13:01:01 +0000489
490 return v;
491
492 onError:
493 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000494}
495
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000496PyObject *PyString_Encode(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000497 Py_ssize_t size,
498 const char *encoding,
499 const char *errors)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000500{
Christian Heimes44720832008-05-26 13:01:01 +0000501 PyObject *v, *str;
502
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000503 str = PyString_FromStringAndSize(s, size);
Christian Heimes44720832008-05-26 13:01:01 +0000504 if (str == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000505 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000506 v = PyString_AsEncodedString(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000507 Py_DECREF(str);
508 return v;
509}
510
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000511PyObject *PyString_AsEncodedObject(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 const char *encoding,
513 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000514{
515 PyObject *v;
516
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000517 if (!PyString_Check(str)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000518 PyErr_BadArgument();
519 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000520 }
521
522 if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000524 encoding = PyUnicode_GetDefaultEncoding();
Christian Heimes44720832008-05-26 13:01:01 +0000525#else
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000526 PyErr_SetString(PyExc_ValueError, "no encoding specified");
527 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000528#endif
529 }
530
531 /* Encode via the codec registry */
Serhiy Storchakac7797dc2015-05-31 20:21:00 +0300532 v = _PyCodec_EncodeText(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000533 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000534 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000535
536 return v;
537
538 onError:
539 return NULL;
540}
541
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000542PyObject *PyString_AsEncodedString(PyObject *str,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000543 const char *encoding,
544 const char *errors)
Christian Heimes44720832008-05-26 13:01:01 +0000545{
546 PyObject *v;
547
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000548 v = PyString_AsEncodedObject(str, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +0000549 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000550 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000551
552#ifdef Py_USING_UNICODE
553 /* Convert Unicode to a string using the default encoding */
554 if (PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000555 PyObject *temp = v;
556 v = PyUnicode_AsEncodedString(v, NULL, NULL);
557 Py_DECREF(temp);
558 if (v == NULL)
559 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000560 }
561#endif
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000562 if (!PyString_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +0000563 PyErr_Format(PyExc_TypeError,
564 "encoder did not return a string object (type=%.400s)",
565 Py_TYPE(v)->tp_name);
566 Py_DECREF(v);
567 goto onError;
Christian Heimes44720832008-05-26 13:01:01 +0000568 }
569
570 return v;
571
572 onError:
573 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000574}
575
576static void
Christian Heimes44720832008-05-26 13:01:01 +0000577string_dealloc(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 switch (PyString_CHECK_INTERNED(op)) {
580 case SSTATE_NOT_INTERNED:
581 break;
Christian Heimes44720832008-05-26 13:01:01 +0000582
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 case SSTATE_INTERNED_MORTAL:
584 /* revive dead object temporarily for DelItem */
585 Py_REFCNT(op) = 3;
586 if (PyDict_DelItem(interned, op) != 0)
587 Py_FatalError(
588 "deletion of interned string failed");
589 break;
Christian Heimes44720832008-05-26 13:01:01 +0000590
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 case SSTATE_INTERNED_IMMORTAL:
592 Py_FatalError("Immortal interned string died.");
Christian Heimes44720832008-05-26 13:01:01 +0000593
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000594 default:
595 Py_FatalError("Inconsistent interned string state.");
596 }
597 Py_TYPE(op)->tp_free(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000598}
599
Christian Heimes44720832008-05-26 13:01:01 +0000600/* Unescape a backslash-escaped string. If unicode is non-zero,
601 the string is a u-literal. If recode_encoding is non-zero,
602 the string is UTF-8 encoded and should be re-encoded in the
603 specified encoding. */
604
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000605PyObject *PyString_DecodeEscape(const char *s,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 Py_ssize_t len,
607 const char *errors,
608 Py_ssize_t unicode,
609 const char *recode_encoding)
Christian Heimes44720832008-05-26 13:01:01 +0000610{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000611 int c;
612 char *p, *buf;
613 const char *end;
614 PyObject *v;
Jay Bosamiyac3c9db82017-06-18 22:11:03 +0530615 Py_ssize_t newlen;
616 /* Check for integer overflow */
617 if (recode_encoding && (len > PY_SSIZE_T_MAX / 4)) {
618 PyErr_SetString(PyExc_OverflowError, "string is too large");
619 return NULL;
620 }
621 newlen = recode_encoding ? 4*len:len;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000622 v = PyString_FromStringAndSize((char *)NULL, newlen);
623 if (v == NULL)
624 return NULL;
625 p = buf = PyString_AsString(v);
626 end = s + len;
627 while (s < end) {
628 if (*s != '\\') {
629 non_esc:
Christian Heimes44720832008-05-26 13:01:01 +0000630#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000631 if (recode_encoding && (*s & 0x80)) {
632 PyObject *u, *w;
633 char *r;
634 const char* t;
635 Py_ssize_t rn;
636 t = s;
637 /* Decode non-ASCII bytes as UTF-8. */
638 while (t < end && (*t & 0x80)) t++;
639 u = PyUnicode_DecodeUTF8(s, t - s, errors);
640 if(!u) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000641
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000642 /* Recode them in target encoding. */
643 w = PyUnicode_AsEncodedString(
644 u, recode_encoding, errors);
645 Py_DECREF(u);
646 if (!w) goto failed;
Christian Heimes44720832008-05-26 13:01:01 +0000647
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000648 /* Append bytes to output buffer. */
649 assert(PyString_Check(w));
650 r = PyString_AS_STRING(w);
651 rn = PyString_GET_SIZE(w);
652 Py_MEMCPY(p, r, rn);
653 p += rn;
654 Py_DECREF(w);
655 s = t;
656 } else {
657 *p++ = *s++;
658 }
Christian Heimes44720832008-05-26 13:01:01 +0000659#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000660 *p++ = *s++;
Christian Heimes44720832008-05-26 13:01:01 +0000661#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000662 continue;
663 }
664 s++;
665 if (s==end) {
666 PyErr_SetString(PyExc_ValueError,
667 "Trailing \\ in string");
668 goto failed;
669 }
670 switch (*s++) {
671 /* XXX This assumes ASCII! */
672 case '\n': break;
673 case '\\': *p++ = '\\'; break;
674 case '\'': *p++ = '\''; break;
675 case '\"': *p++ = '\"'; break;
676 case 'b': *p++ = '\b'; break;
677 case 'f': *p++ = '\014'; break; /* FF */
678 case 't': *p++ = '\t'; break;
679 case 'n': *p++ = '\n'; break;
680 case 'r': *p++ = '\r'; break;
681 case 'v': *p++ = '\013'; break; /* VT */
682 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
683 case '0': case '1': case '2': case '3':
684 case '4': case '5': case '6': case '7':
685 c = s[-1] - '0';
686 if (s < end && '0' <= *s && *s <= '7') {
687 c = (c<<3) + *s++ - '0';
688 if (s < end && '0' <= *s && *s <= '7')
689 c = (c<<3) + *s++ - '0';
690 }
691 *p++ = c;
692 break;
693 case 'x':
694 if (s+1 < end &&
695 isxdigit(Py_CHARMASK(s[0])) &&
696 isxdigit(Py_CHARMASK(s[1])))
697 {
698 unsigned int x = 0;
699 c = Py_CHARMASK(*s);
700 s++;
701 if (isdigit(c))
702 x = c - '0';
703 else if (islower(c))
704 x = 10 + c - 'a';
705 else
706 x = 10 + c - 'A';
707 x = x << 4;
708 c = Py_CHARMASK(*s);
709 s++;
710 if (isdigit(c))
711 x += c - '0';
712 else if (islower(c))
713 x += 10 + c - 'a';
714 else
715 x += 10 + c - 'A';
716 *p++ = x;
717 break;
718 }
719 if (!errors || strcmp(errors, "strict") == 0) {
720 PyErr_SetString(PyExc_ValueError,
721 "invalid \\x escape");
722 goto failed;
723 }
724 if (strcmp(errors, "replace") == 0) {
725 *p++ = '?';
726 } else if (strcmp(errors, "ignore") == 0)
727 /* do nothing */;
728 else {
729 PyErr_Format(PyExc_ValueError,
730 "decoding error; "
731 "unknown error handling code: %.400s",
732 errors);
733 goto failed;
734 }
Serhiy Storchaka01b3a082013-01-25 23:30:50 +0200735 /* skip \x */
736 if (s < end && isxdigit(Py_CHARMASK(s[0])))
737 s++; /* and a hexdigit */
738 break;
Christian Heimes44720832008-05-26 13:01:01 +0000739#ifndef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000740 case 'u':
741 case 'U':
742 case 'N':
743 if (unicode) {
744 PyErr_SetString(PyExc_ValueError,
745 "Unicode escapes not legal "
746 "when Unicode disabled");
747 goto failed;
748 }
Christian Heimes44720832008-05-26 13:01:01 +0000749#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000750 default:
751 *p++ = '\\';
752 s--;
Ezio Melotti24b07bc2011-03-15 18:55:01 +0200753 goto non_esc; /* an arbitrary number of unescaped
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000754 UTF-8 bytes may follow. */
755 }
756 }
Kristján Valur Jónssonbe580f22014-04-25 09:51:21 +0000757 if (p-buf < newlen)
758 _PyString_Resize(&v, p - buf); /* v is cleared on error */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000759 return v;
Christian Heimes44720832008-05-26 13:01:01 +0000760 failed:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000761 Py_DECREF(v);
762 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +0000763}
764
765/* -------------------------------------------------------------------- */
766/* object api */
767
Christian Heimes1a6387e2008-03-26 12:49:49 +0000768static Py_ssize_t
Christian Heimes44720832008-05-26 13:01:01 +0000769string_getsize(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000770{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000771 char *s;
772 Py_ssize_t len;
773 if (PyString_AsStringAndSize(op, &s, &len))
774 return -1;
775 return len;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000776}
777
Christian Heimes44720832008-05-26 13:01:01 +0000778static /*const*/ char *
779string_getbuffer(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000780{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000781 char *s;
782 Py_ssize_t len;
783 if (PyString_AsStringAndSize(op, &s, &len))
784 return NULL;
785 return s;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000786}
787
788Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000789PyString_Size(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000790{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000791 if (!PyString_Check(op))
792 return string_getsize(op);
793 return Py_SIZE(op);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000794}
795
Christian Heimes44720832008-05-26 13:01:01 +0000796/*const*/ char *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000797PyString_AsString(register PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000798{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000799 if (!PyString_Check(op))
800 return string_getbuffer(op);
801 return ((PyStringObject *)op) -> ob_sval;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000802}
803
804int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000805PyString_AsStringAndSize(register PyObject *obj,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 register char **s,
807 register Py_ssize_t *len)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000808{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000809 if (s == NULL) {
810 PyErr_BadInternalCall();
811 return -1;
812 }
Christian Heimes1a6387e2008-03-26 12:49:49 +0000813
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000814 if (!PyString_Check(obj)) {
Christian Heimes44720832008-05-26 13:01:01 +0000815#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000816 if (PyUnicode_Check(obj)) {
817 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
818 if (obj == NULL)
819 return -1;
820 }
821 else
Christian Heimes1a6387e2008-03-26 12:49:49 +0000822#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000823 {
824 PyErr_Format(PyExc_TypeError,
825 "expected string or Unicode object, "
826 "%.200s found", Py_TYPE(obj)->tp_name);
827 return -1;
828 }
829 }
Christian Heimes44720832008-05-26 13:01:01 +0000830
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000831 *s = PyString_AS_STRING(obj);
832 if (len != NULL)
833 *len = PyString_GET_SIZE(obj);
834 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
835 PyErr_SetString(PyExc_TypeError,
836 "expected string without null bytes");
837 return -1;
838 }
839 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000840}
841
Christian Heimes1a6387e2008-03-26 12:49:49 +0000842/* -------------------------------------------------------------------- */
843/* Methods */
844
Christian Heimes44720832008-05-26 13:01:01 +0000845#include "stringlib/stringdefs.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000846#include "stringlib/fastsearch.h"
Christian Heimes44720832008-05-26 13:01:01 +0000847
Christian Heimes1a6387e2008-03-26 12:49:49 +0000848#include "stringlib/count.h"
849#include "stringlib/find.h"
850#include "stringlib/partition.h"
Antoine Pitrou64672132010-01-13 07:55:48 +0000851#include "stringlib/split.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000852
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000853#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
Christian Heimes44720832008-05-26 13:01:01 +0000854#include "stringlib/localeutil.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +0000855
Christian Heimes1a6387e2008-03-26 12:49:49 +0000856
857
858static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000859string_print(PyStringObject *op, FILE *fp, int flags)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000860{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000861 Py_ssize_t i, str_len;
862 char c;
863 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000864
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000865 /* XXX Ought to check for interrupts when writing long strings */
866 if (! PyString_CheckExact(op)) {
867 int ret;
868 /* A str subclass may have its own __str__ method. */
869 op = (PyStringObject *) PyObject_Str((PyObject *)op);
870 if (op == NULL)
871 return -1;
872 ret = string_print(op, fp, flags);
873 Py_DECREF(op);
874 return ret;
875 }
876 if (flags & Py_PRINT_RAW) {
877 char *data = op->ob_sval;
878 Py_ssize_t size = Py_SIZE(op);
879 Py_BEGIN_ALLOW_THREADS
880 while (size > INT_MAX) {
881 /* Very long strings cannot be written atomically.
882 * But don't write exactly INT_MAX bytes at a time
883 * to avoid memory aligment issues.
884 */
885 const int chunk_size = INT_MAX & ~0x3FFF;
886 fwrite(data, 1, chunk_size, fp);
887 data += chunk_size;
888 size -= chunk_size;
889 }
Christian Heimes44720832008-05-26 13:01:01 +0000890#ifdef __VMS
Ronald Oussoren3687e802013-07-11 13:33:55 +0200891 if (size) fwrite(data, (size_t)size, 1, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000892#else
Ronald Oussoren3687e802013-07-11 13:33:55 +0200893 fwrite(data, 1, (size_t)size, fp);
Christian Heimes44720832008-05-26 13:01:01 +0000894#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000895 Py_END_ALLOW_THREADS
896 return 0;
897 }
Christian Heimes44720832008-05-26 13:01:01 +0000898
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000899 /* figure out which quote to use; single is preferred */
900 quote = '\'';
901 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
902 !memchr(op->ob_sval, '"', Py_SIZE(op)))
903 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000904
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000905 str_len = Py_SIZE(op);
906 Py_BEGIN_ALLOW_THREADS
907 fputc(quote, fp);
908 for (i = 0; i < str_len; i++) {
909 /* Since strings are immutable and the caller should have a
Martin Panter3d36f0f2016-07-28 02:37:04 +0000910 reference, accessing the internal buffer should not be an issue
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000911 with the GIL released. */
912 c = op->ob_sval[i];
913 if (c == quote || c == '\\')
914 fprintf(fp, "\\%c", c);
915 else if (c == '\t')
916 fprintf(fp, "\\t");
917 else if (c == '\n')
918 fprintf(fp, "\\n");
919 else if (c == '\r')
920 fprintf(fp, "\\r");
921 else if (c < ' ' || c >= 0x7f)
922 fprintf(fp, "\\x%02x", c & 0xff);
923 else
924 fputc(c, fp);
925 }
926 fputc(quote, fp);
927 Py_END_ALLOW_THREADS
928 return 0;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000929}
930
Christian Heimes44720832008-05-26 13:01:01 +0000931PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +0000932PyString_Repr(PyObject *obj, int smartquotes)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000933{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000934 register PyStringObject* op = (PyStringObject*) obj;
Benjamin Petersonf8c4b3a2014-09-29 19:01:18 -0400935 size_t newsize;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000936 PyObject *v;
Benjamin Petersonf8c4b3a2014-09-29 19:01:18 -0400937 if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000938 PyErr_SetString(PyExc_OverflowError,
939 "string is too large to make repr");
940 return NULL;
941 }
Benjamin Petersonf8c4b3a2014-09-29 19:01:18 -0400942 newsize = 2 + 4*Py_SIZE(op);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000943 v = PyString_FromStringAndSize((char *)NULL, newsize);
944 if (v == NULL) {
945 return NULL;
946 }
947 else {
948 register Py_ssize_t i;
949 register char c;
950 register char *p;
951 int quote;
Christian Heimes1a6387e2008-03-26 12:49:49 +0000952
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000953 /* figure out which quote to use; single is preferred */
954 quote = '\'';
955 if (smartquotes &&
956 memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
957 !memchr(op->ob_sval, '"', Py_SIZE(op)))
958 quote = '"';
Christian Heimes44720832008-05-26 13:01:01 +0000959
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000960 p = PyString_AS_STRING(v);
961 *p++ = quote;
962 for (i = 0; i < Py_SIZE(op); i++) {
963 /* There's at least enough room for a hex escape
964 and a closing quote. */
965 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
966 c = op->ob_sval[i];
967 if (c == quote || c == '\\')
968 *p++ = '\\', *p++ = c;
969 else if (c == '\t')
970 *p++ = '\\', *p++ = 't';
971 else if (c == '\n')
972 *p++ = '\\', *p++ = 'n';
973 else if (c == '\r')
974 *p++ = '\\', *p++ = 'r';
975 else if (c < ' ' || c >= 0x7f) {
976 /* For performance, we don't want to call
977 PyOS_snprintf here (extra layers of
978 function call). */
979 sprintf(p, "\\x%02x", c & 0xff);
980 p += 4;
981 }
982 else
983 *p++ = c;
984 }
985 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
986 *p++ = quote;
987 *p = '\0';
988 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
989 return NULL;
990 return v;
991 }
Christian Heimes44720832008-05-26 13:01:01 +0000992}
Christian Heimes1a6387e2008-03-26 12:49:49 +0000993
994static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +0000995string_repr(PyObject *op)
Christian Heimes1a6387e2008-03-26 12:49:49 +0000996{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000997 return PyString_Repr(op, 1);
Christian Heimes1a6387e2008-03-26 12:49:49 +0000998}
999
Christian Heimes1a6387e2008-03-26 12:49:49 +00001000static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00001001string_str(PyObject *s)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001002{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001003 assert(PyString_Check(s));
1004 if (PyString_CheckExact(s)) {
1005 Py_INCREF(s);
1006 return s;
1007 }
1008 else {
1009 /* Subtype -- return genuine string with the same value. */
1010 PyStringObject *t = (PyStringObject *) s;
1011 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1012 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00001013}
1014
Christian Heimes44720832008-05-26 13:01:01 +00001015static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001016string_length(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001017{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001018 return Py_SIZE(a);
Christian Heimes44720832008-05-26 13:01:01 +00001019}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001020
Christian Heimes44720832008-05-26 13:01:01 +00001021static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001022string_concat(register PyStringObject *a, register PyObject *bb)
Christian Heimes44720832008-05-26 13:01:01 +00001023{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001024 register Py_ssize_t size;
1025 register PyStringObject *op;
1026 if (!PyString_Check(bb)) {
Christian Heimes44720832008-05-26 13:01:01 +00001027#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001028 if (PyUnicode_Check(bb))
1029 return PyUnicode_Concat((PyObject *)a, bb);
Christian Heimes44720832008-05-26 13:01:01 +00001030#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001031 if (PyByteArray_Check(bb))
1032 return PyByteArray_Concat((PyObject *)a, bb);
1033 PyErr_Format(PyExc_TypeError,
1034 "cannot concatenate 'str' and '%.200s' objects",
1035 Py_TYPE(bb)->tp_name);
1036 return NULL;
1037 }
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001038#define b ((PyStringObject *)bb)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001039 /* Optimize cases with empty left or right operand */
1040 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1041 PyString_CheckExact(a) && PyString_CheckExact(b)) {
1042 if (Py_SIZE(a) == 0) {
1043 Py_INCREF(bb);
1044 return bb;
1045 }
1046 Py_INCREF(a);
1047 return (PyObject *)a;
1048 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001049 /* Check that string sizes are not negative, to prevent an
1050 overflow in cases where we are passed incorrectly-created
1051 strings with negative lengths (due to a bug in other code).
1052 */
1053 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1054 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1055 PyErr_SetString(PyExc_OverflowError,
1056 "strings are too large to concat");
1057 return NULL;
1058 }
Serhiy Storchaka373773d2016-07-12 15:46:57 +03001059 size = Py_SIZE(a) + Py_SIZE(b);
Mark Dickinson826f3fe2008-12-05 21:55:28 +00001060
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001061 /* Inline PyObject_NewVar */
1062 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1063 PyErr_SetString(PyExc_OverflowError,
1064 "strings are too large to concat");
1065 return NULL;
1066 }
1067 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1068 if (op == NULL)
1069 return PyErr_NoMemory();
Martin Panter646b5282016-06-21 23:58:05 +00001070 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001071 op->ob_shash = -1;
1072 op->ob_sstate = SSTATE_NOT_INTERNED;
1073 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1074 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1075 op->ob_sval[size] = '\0';
1076 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001077#undef b
1078}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001079
Christian Heimes44720832008-05-26 13:01:01 +00001080static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001081string_repeat(register PyStringObject *a, register Py_ssize_t n)
Christian Heimes44720832008-05-26 13:01:01 +00001082{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001083 register Py_ssize_t i;
1084 register Py_ssize_t j;
1085 register Py_ssize_t size;
1086 register PyStringObject *op;
1087 size_t nbytes;
1088 if (n < 0)
1089 n = 0;
Serhiy Storchaka373773d2016-07-12 15:46:57 +03001090 /* watch out for overflows: the size can overflow Py_ssize_t,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001091 * and the # of bytes needed can overflow size_t
1092 */
Serhiy Storchaka373773d2016-07-12 15:46:57 +03001093 if (n && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001094 PyErr_SetString(PyExc_OverflowError,
1095 "repeated string is too long");
1096 return NULL;
1097 }
Serhiy Storchaka373773d2016-07-12 15:46:57 +03001098 size = Py_SIZE(a) * n;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001099 if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1100 Py_INCREF(a);
1101 return (PyObject *)a;
1102 }
1103 nbytes = (size_t)size;
1104 if (nbytes + PyStringObject_SIZE <= nbytes) {
1105 PyErr_SetString(PyExc_OverflowError,
1106 "repeated string is too long");
1107 return NULL;
1108 }
1109 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1110 if (op == NULL)
1111 return PyErr_NoMemory();
Martin Panter646b5282016-06-21 23:58:05 +00001112 (void)PyObject_INIT_VAR(op, &PyString_Type, size);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001113 op->ob_shash = -1;
1114 op->ob_sstate = SSTATE_NOT_INTERNED;
1115 op->ob_sval[size] = '\0';
1116 if (Py_SIZE(a) == 1 && n > 0) {
1117 memset(op->ob_sval, a->ob_sval[0] , n);
1118 return (PyObject *) op;
1119 }
1120 i = 0;
1121 if (i < size) {
1122 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1123 i = Py_SIZE(a);
1124 }
1125 while (i < size) {
1126 j = (i <= size-i) ? i : size-i;
1127 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1128 i += j;
1129 }
1130 return (PyObject *) op;
Christian Heimes44720832008-05-26 13:01:01 +00001131}
Christian Heimes1a6387e2008-03-26 12:49:49 +00001132
Christian Heimes44720832008-05-26 13:01:01 +00001133/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1134
1135static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001136string_slice(register PyStringObject *a, register Py_ssize_t i,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001137 register Py_ssize_t j)
Christian Heimes44720832008-05-26 13:01:01 +00001138 /* j -- may be negative! */
1139{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001140 if (i < 0)
1141 i = 0;
1142 if (j < 0)
1143 j = 0; /* Avoid signed/unsigned bug in next line */
1144 if (j > Py_SIZE(a))
1145 j = Py_SIZE(a);
1146 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1147 /* It's the same as a */
1148 Py_INCREF(a);
1149 return (PyObject *)a;
1150 }
1151 if (j < i)
1152 j = i;
1153 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001154}
1155
1156static int
1157string_contains(PyObject *str_obj, PyObject *sub_obj)
1158{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001159 if (!PyString_CheckExact(sub_obj)) {
Christian Heimes44720832008-05-26 13:01:01 +00001160#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001161 if (PyUnicode_Check(sub_obj))
1162 return PyUnicode_Contains(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001163#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001164 if (!PyString_Check(sub_obj)) {
1165 PyErr_Format(PyExc_TypeError,
1166 "'in <string>' requires string as left operand, "
1167 "not %.200s", Py_TYPE(sub_obj)->tp_name);
1168 return -1;
1169 }
1170 }
Christian Heimes44720832008-05-26 13:01:01 +00001171
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001172 return stringlib_contains_obj(str_obj, sub_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001173}
1174
1175static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001176string_item(PyStringObject *a, register Py_ssize_t i)
Christian Heimes44720832008-05-26 13:01:01 +00001177{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001178 char pchar;
1179 PyObject *v;
1180 if (i < 0 || i >= Py_SIZE(a)) {
1181 PyErr_SetString(PyExc_IndexError, "string index out of range");
1182 return NULL;
1183 }
1184 pchar = a->ob_sval[i];
1185 v = (PyObject *)characters[pchar & UCHAR_MAX];
1186 if (v == NULL)
1187 v = PyString_FromStringAndSize(&pchar, 1);
1188 else {
Christian Heimes44720832008-05-26 13:01:01 +00001189#ifdef COUNT_ALLOCS
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001190 one_strings++;
Christian Heimes44720832008-05-26 13:01:01 +00001191#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001192 Py_INCREF(v);
1193 }
1194 return v;
Christian Heimes44720832008-05-26 13:01:01 +00001195}
1196
1197static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001198string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Christian Heimes44720832008-05-26 13:01:01 +00001199{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001200 int c;
1201 Py_ssize_t len_a, len_b;
1202 Py_ssize_t min_len;
1203 PyObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00001204
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001205 /* Make sure both arguments are strings. */
1206 if (!(PyString_Check(a) && PyString_Check(b))) {
1207 result = Py_NotImplemented;
1208 goto out;
1209 }
1210 if (a == b) {
1211 switch (op) {
1212 case Py_EQ:case Py_LE:case Py_GE:
1213 result = Py_True;
1214 goto out;
1215 case Py_NE:case Py_LT:case Py_GT:
1216 result = Py_False;
1217 goto out;
1218 }
1219 }
1220 if (op == Py_EQ) {
1221 /* Supporting Py_NE here as well does not save
1222 much time, since Py_NE is rarely used. */
1223 if (Py_SIZE(a) == Py_SIZE(b)
1224 && (a->ob_sval[0] == b->ob_sval[0]
1225 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1226 result = Py_True;
1227 } else {
1228 result = Py_False;
1229 }
1230 goto out;
1231 }
1232 len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1233 min_len = (len_a < len_b) ? len_a : len_b;
1234 if (min_len > 0) {
1235 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1236 if (c==0)
1237 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1238 } else
1239 c = 0;
1240 if (c == 0)
1241 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1242 switch (op) {
1243 case Py_LT: c = c < 0; break;
1244 case Py_LE: c = c <= 0; break;
1245 case Py_EQ: assert(0); break; /* unreachable */
1246 case Py_NE: c = c != 0; break;
1247 case Py_GT: c = c > 0; break;
1248 case Py_GE: c = c >= 0; break;
1249 default:
1250 result = Py_NotImplemented;
1251 goto out;
1252 }
1253 result = c ? Py_True : Py_False;
Christian Heimes44720832008-05-26 13:01:01 +00001254 out:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001255 Py_INCREF(result);
1256 return result;
Christian Heimes44720832008-05-26 13:01:01 +00001257}
1258
1259int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001260_PyString_Eq(PyObject *o1, PyObject *o2)
Christian Heimes44720832008-05-26 13:01:01 +00001261{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001262 PyStringObject *a = (PyStringObject*) o1;
1263 PyStringObject *b = (PyStringObject*) o2;
1264 return Py_SIZE(a) == Py_SIZE(b)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001265 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
Christian Heimes44720832008-05-26 13:01:01 +00001266}
1267
1268static long
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001269string_hash(PyStringObject *a)
Christian Heimes44720832008-05-26 13:01:01 +00001270{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001271 register Py_ssize_t len;
1272 register unsigned char *p;
1273 register long x;
Christian Heimes44720832008-05-26 13:01:01 +00001274
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001275#ifdef Py_DEBUG
Benjamin Peterson26da9202012-02-21 11:08:50 -05001276 assert(_Py_HashSecret_Initialized);
Benjamin Petersonf51c3842012-04-09 14:53:07 -04001277#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001278 if (a->ob_shash != -1)
1279 return a->ob_shash;
1280 len = Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001281 /*
1282 We make the hash of the empty string be 0, rather than using
1283 (prefix ^ suffix), since this slightly obfuscates the hash secret
1284 */
1285 if (len == 0) {
1286 a->ob_shash = 0;
1287 return 0;
1288 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001289 p = (unsigned char *) a->ob_sval;
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001290 x = _Py_HashSecret.prefix;
1291 x ^= *p << 7;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001292 while (--len >= 0)
1293 x = (1000003*x) ^ *p++;
1294 x ^= Py_SIZE(a);
Barry Warsaw1e13eb02012-02-20 20:42:21 -05001295 x ^= _Py_HashSecret.suffix;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001296 if (x == -1)
1297 x = -2;
1298 a->ob_shash = x;
1299 return x;
Christian Heimes44720832008-05-26 13:01:01 +00001300}
1301
1302static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001303string_subscript(PyStringObject* self, PyObject* item)
Christian Heimes44720832008-05-26 13:01:01 +00001304{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001305 if (PyIndex_Check(item)) {
1306 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1307 if (i == -1 && PyErr_Occurred())
1308 return NULL;
1309 if (i < 0)
1310 i += PyString_GET_SIZE(self);
1311 return string_item(self, i);
1312 }
1313 else if (PySlice_Check(item)) {
1314 Py_ssize_t start, stop, step, slicelength, cur, i;
1315 char* source_buf;
1316 char* result_buf;
1317 PyObject* result;
Christian Heimes44720832008-05-26 13:01:01 +00001318
Serhiy Storchaka5e793212017-04-15 20:11:12 +03001319 if (_PySlice_Unpack(item, &start, &stop, &step) < 0) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001320 return NULL;
1321 }
Serhiy Storchakae41390a2017-04-08 11:48:57 +03001322 slicelength = _PySlice_AdjustIndices(PyString_GET_SIZE(self), &start,
1323 &stop, step);
Christian Heimes44720832008-05-26 13:01:01 +00001324
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001325 if (slicelength <= 0) {
1326 return PyString_FromStringAndSize("", 0);
1327 }
1328 else if (start == 0 && step == 1 &&
1329 slicelength == PyString_GET_SIZE(self) &&
1330 PyString_CheckExact(self)) {
1331 Py_INCREF(self);
1332 return (PyObject *)self;
1333 }
1334 else if (step == 1) {
1335 return PyString_FromStringAndSize(
1336 PyString_AS_STRING(self) + start,
1337 slicelength);
1338 }
1339 else {
1340 source_buf = PyString_AsString((PyObject*)self);
1341 result_buf = (char *)PyMem_Malloc(slicelength);
1342 if (result_buf == NULL)
1343 return PyErr_NoMemory();
Christian Heimes44720832008-05-26 13:01:01 +00001344
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001345 for (cur = start, i = 0; i < slicelength;
1346 cur += step, i++) {
1347 result_buf[i] = source_buf[cur];
1348 }
Christian Heimes44720832008-05-26 13:01:01 +00001349
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001350 result = PyString_FromStringAndSize(result_buf,
1351 slicelength);
1352 PyMem_Free(result_buf);
1353 return result;
1354 }
1355 }
1356 else {
1357 PyErr_Format(PyExc_TypeError,
1358 "string indices must be integers, not %.200s",
1359 Py_TYPE(item)->tp_name);
1360 return NULL;
1361 }
Christian Heimes44720832008-05-26 13:01:01 +00001362}
1363
1364static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001365string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001366{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001367 if ( index != 0 ) {
1368 PyErr_SetString(PyExc_SystemError,
1369 "accessing non-existent string segment");
1370 return -1;
1371 }
1372 *ptr = (void *)self->ob_sval;
1373 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001374}
1375
1376static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001377string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001378{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001379 PyErr_SetString(PyExc_TypeError,
1380 "Cannot use string as modifiable buffer");
1381 return -1;
Christian Heimes44720832008-05-26 13:01:01 +00001382}
1383
1384static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001385string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Christian Heimes44720832008-05-26 13:01:01 +00001386{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001387 if ( lenp )
1388 *lenp = Py_SIZE(self);
1389 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00001390}
1391
1392static Py_ssize_t
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001393string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Christian Heimes44720832008-05-26 13:01:01 +00001394{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001395 if ( index != 0 ) {
1396 PyErr_SetString(PyExc_SystemError,
1397 "accessing non-existent string segment");
1398 return -1;
1399 }
1400 *ptr = self->ob_sval;
1401 return Py_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00001402}
1403
1404static int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001405string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
Christian Heimes44720832008-05-26 13:01:01 +00001406{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001407 return PyBuffer_FillInfo(view, (PyObject*)self,
1408 (void *)self->ob_sval, Py_SIZE(self),
1409 1, flags);
Christian Heimes44720832008-05-26 13:01:01 +00001410}
1411
1412static PySequenceMethods string_as_sequence = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001413 (lenfunc)string_length, /*sq_length*/
1414 (binaryfunc)string_concat, /*sq_concat*/
1415 (ssizeargfunc)string_repeat, /*sq_repeat*/
1416 (ssizeargfunc)string_item, /*sq_item*/
1417 (ssizessizeargfunc)string_slice, /*sq_slice*/
1418 0, /*sq_ass_item*/
1419 0, /*sq_ass_slice*/
1420 (objobjproc)string_contains /*sq_contains*/
Christian Heimes44720832008-05-26 13:01:01 +00001421};
1422
1423static PyMappingMethods string_as_mapping = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001424 (lenfunc)string_length,
1425 (binaryfunc)string_subscript,
1426 0,
Christian Heimes44720832008-05-26 13:01:01 +00001427};
1428
1429static PyBufferProcs string_as_buffer = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001430 (readbufferproc)string_buffer_getreadbuf,
1431 (writebufferproc)string_buffer_getwritebuf,
1432 (segcountproc)string_buffer_getsegcount,
1433 (charbufferproc)string_buffer_getcharbuf,
1434 (getbufferproc)string_buffer_getbuffer,
1435 0, /* XXX */
Christian Heimes44720832008-05-26 13:01:01 +00001436};
1437
1438
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00001439
Christian Heimes44720832008-05-26 13:01:01 +00001440#define LEFTSTRIP 0
1441#define RIGHTSTRIP 1
1442#define BOTHSTRIP 2
1443
1444/* Arrays indexed by above */
1445static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1446
1447#define STRIPNAME(i) (stripformat[i]+3)
1448
Christian Heimes1a6387e2008-03-26 12:49:49 +00001449PyDoc_STRVAR(split__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001450"S.split([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001451\n\
Christian Heimes44720832008-05-26 13:01:01 +00001452Return a list of the words in the string S, using sep as the\n\
1453delimiter string. If maxsplit is given, at most maxsplit\n\
1454splits are done. If sep is not specified or is None, any\n\
1455whitespace string is a separator and empty strings are removed\n\
1456from the result.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001457
1458static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001459string_split(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001460{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001461 Py_ssize_t len = PyString_GET_SIZE(self), n;
1462 Py_ssize_t maxsplit = -1;
1463 const char *s = PyString_AS_STRING(self), *sub;
1464 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001465
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001466 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1467 return NULL;
1468 if (maxsplit < 0)
1469 maxsplit = PY_SSIZE_T_MAX;
1470 if (subobj == Py_None)
1471 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1472 if (PyString_Check(subobj)) {
1473 sub = PyString_AS_STRING(subobj);
1474 n = PyString_GET_SIZE(subobj);
1475 }
Christian Heimes44720832008-05-26 13:01:01 +00001476#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001477 else if (PyUnicode_Check(subobj))
1478 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001479#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001480 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1481 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001482
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001483 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes1a6387e2008-03-26 12:49:49 +00001484}
1485
1486PyDoc_STRVAR(partition__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001487"S.partition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001488\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001489Search for the separator sep in S, and return the part before it,\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001490the separator itself, and the part after it. If the separator is not\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001491found, return S and two empty strings.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001492
1493static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001494string_partition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001495{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001496 const char *sep;
1497 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001498
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001499 if (PyString_Check(sep_obj)) {
1500 sep = PyString_AS_STRING(sep_obj);
1501 sep_len = PyString_GET_SIZE(sep_obj);
1502 }
Christian Heimes44720832008-05-26 13:01:01 +00001503#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001504 else if (PyUnicode_Check(sep_obj))
1505 return PyUnicode_Partition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001506#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001507 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1508 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001509
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001510 return stringlib_partition(
1511 (PyObject*) self,
1512 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1513 sep_obj, sep, sep_len
1514 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001515}
1516
1517PyDoc_STRVAR(rpartition__doc__,
Ezio Melotti1fafaab2010-01-25 11:24:37 +00001518"S.rpartition(sep) -> (head, sep, tail)\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001519\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001520Search for the separator sep in S, starting at the end of S, and return\n\
Christian Heimes44720832008-05-26 13:01:01 +00001521the part before it, the separator itself, and the part after it. If the\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00001522separator is not found, return two empty strings and S.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001523
1524static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001525string_rpartition(PyStringObject *self, PyObject *sep_obj)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001526{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001527 const char *sep;
1528 Py_ssize_t sep_len;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001529
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001530 if (PyString_Check(sep_obj)) {
1531 sep = PyString_AS_STRING(sep_obj);
1532 sep_len = PyString_GET_SIZE(sep_obj);
1533 }
Christian Heimes44720832008-05-26 13:01:01 +00001534#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001535 else if (PyUnicode_Check(sep_obj))
1536 return PyUnicode_RPartition((PyObject *) self, sep_obj);
Christian Heimes44720832008-05-26 13:01:01 +00001537#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001538 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1539 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001540
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001541 return stringlib_rpartition(
1542 (PyObject*) self,
1543 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1544 sep_obj, sep, sep_len
1545 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00001546}
1547
Christian Heimes1a6387e2008-03-26 12:49:49 +00001548PyDoc_STRVAR(rsplit__doc__,
Christian Heimes44720832008-05-26 13:01:01 +00001549"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00001550\n\
Christian Heimes44720832008-05-26 13:01:01 +00001551Return a list of the words in the string S, using sep as the\n\
1552delimiter string, starting at the end of the string and working\n\
1553to the front. If maxsplit is given, at most maxsplit splits are\n\
1554done. If sep is not specified or is None, any whitespace string\n\
1555is a separator.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00001556
1557static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001558string_rsplit(PyStringObject *self, PyObject *args)
Christian Heimes1a6387e2008-03-26 12:49:49 +00001559{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001560 Py_ssize_t len = PyString_GET_SIZE(self), n;
1561 Py_ssize_t maxsplit = -1;
1562 const char *s = PyString_AS_STRING(self), *sub;
1563 PyObject *subobj = Py_None;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001564
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001565 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1566 return NULL;
1567 if (maxsplit < 0)
1568 maxsplit = PY_SSIZE_T_MAX;
1569 if (subobj == Py_None)
1570 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1571 if (PyString_Check(subobj)) {
1572 sub = PyString_AS_STRING(subobj);
1573 n = PyString_GET_SIZE(subobj);
1574 }
Christian Heimes44720832008-05-26 13:01:01 +00001575#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001576 else if (PyUnicode_Check(subobj))
1577 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001578#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001579 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1580 return NULL;
Christian Heimes1a6387e2008-03-26 12:49:49 +00001581
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001582 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
Christian Heimes44720832008-05-26 13:01:01 +00001583}
1584
1585
1586PyDoc_STRVAR(join__doc__,
Georg Brandl9b4e5822009-10-14 18:48:32 +00001587"S.join(iterable) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00001588\n\
1589Return a string which is the concatenation of the strings in the\n\
Georg Brandl9b4e5822009-10-14 18:48:32 +00001590iterable. The separator between elements is S.");
Christian Heimes44720832008-05-26 13:01:01 +00001591
1592static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001593string_join(PyStringObject *self, PyObject *orig)
Christian Heimes44720832008-05-26 13:01:01 +00001594{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001595 char *sep = PyString_AS_STRING(self);
1596 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1597 PyObject *res = NULL;
1598 char *p;
1599 Py_ssize_t seqlen = 0;
1600 size_t sz = 0;
1601 Py_ssize_t i;
1602 PyObject *seq, *item;
Christian Heimes44720832008-05-26 13:01:01 +00001603
Benjamin Peterson1643d5c2014-09-28 12:48:46 -04001604 seq = PySequence_Fast(orig, "can only join an iterable");
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001605 if (seq == NULL) {
1606 return NULL;
1607 }
Christian Heimes44720832008-05-26 13:01:01 +00001608
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001609 seqlen = PySequence_Size(seq);
1610 if (seqlen == 0) {
1611 Py_DECREF(seq);
1612 return PyString_FromString("");
1613 }
1614 if (seqlen == 1) {
1615 item = PySequence_Fast_GET_ITEM(seq, 0);
1616 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1617 Py_INCREF(item);
1618 Py_DECREF(seq);
1619 return item;
1620 }
1621 }
Christian Heimes44720832008-05-26 13:01:01 +00001622
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001623 /* There are at least two things to join, or else we have a subclass
1624 * of the builtin types in the sequence.
1625 * Do a pre-pass to figure out the total amount of space we'll
1626 * need (sz), see whether any argument is absurd, and defer to
1627 * the Unicode join if appropriate.
1628 */
1629 for (i = 0; i < seqlen; i++) {
1630 const size_t old_sz = sz;
1631 item = PySequence_Fast_GET_ITEM(seq, i);
1632 if (!PyString_Check(item)){
Christian Heimes44720832008-05-26 13:01:01 +00001633#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001634 if (PyUnicode_Check(item)) {
1635 /* Defer to Unicode join.
Martin Panterb1d867f2016-05-26 05:28:50 +00001636 * CAUTION: There's no guarantee that the
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001637 * original sequence can be iterated over
1638 * again, so we must pass seq here.
1639 */
1640 PyObject *result;
1641 result = PyUnicode_Join((PyObject *)self, seq);
1642 Py_DECREF(seq);
1643 return result;
1644 }
Christian Heimes44720832008-05-26 13:01:01 +00001645#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001646 PyErr_Format(PyExc_TypeError,
1647 "sequence item %zd: expected string,"
1648 " %.80s found",
1649 i, Py_TYPE(item)->tp_name);
1650 Py_DECREF(seq);
1651 return NULL;
1652 }
1653 sz += PyString_GET_SIZE(item);
1654 if (i != 0)
1655 sz += seplen;
1656 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1657 PyErr_SetString(PyExc_OverflowError,
1658 "join() result is too long for a Python string");
1659 Py_DECREF(seq);
1660 return NULL;
1661 }
1662 }
Christian Heimes44720832008-05-26 13:01:01 +00001663
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001664 /* Allocate result space. */
1665 res = PyString_FromStringAndSize((char*)NULL, sz);
1666 if (res == NULL) {
1667 Py_DECREF(seq);
1668 return NULL;
1669 }
Christian Heimes44720832008-05-26 13:01:01 +00001670
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001671 /* Catenate everything. */
1672 p = PyString_AS_STRING(res);
1673 for (i = 0; i < seqlen; ++i) {
1674 size_t n;
1675 item = PySequence_Fast_GET_ITEM(seq, i);
1676 n = PyString_GET_SIZE(item);
1677 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1678 p += n;
1679 if (i < seqlen - 1) {
1680 Py_MEMCPY(p, sep, seplen);
1681 p += seplen;
1682 }
1683 }
Christian Heimes44720832008-05-26 13:01:01 +00001684
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001685 Py_DECREF(seq);
1686 return res;
Christian Heimes44720832008-05-26 13:01:01 +00001687}
1688
1689PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001690_PyString_Join(PyObject *sep, PyObject *x)
Christian Heimes44720832008-05-26 13:01:01 +00001691{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001692 assert(sep != NULL && PyString_Check(sep));
1693 assert(x != NULL);
1694 return string_join((PyStringObject *)sep, x);
Christian Heimes44720832008-05-26 13:01:01 +00001695}
1696
Antoine Pitrou64672132010-01-13 07:55:48 +00001697/* helper macro to fixup start/end slice values */
1698#define ADJUST_INDICES(start, end, len) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001699 if (end > len) \
1700 end = len; \
1701 else if (end < 0) { \
1702 end += len; \
1703 if (end < 0) \
1704 end = 0; \
1705 } \
1706 if (start < 0) { \
1707 start += len; \
1708 if (start < 0) \
1709 start = 0; \
1710 }
Christian Heimes44720832008-05-26 13:01:01 +00001711
1712Py_LOCAL_INLINE(Py_ssize_t)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001713string_find_internal(PyStringObject *self, PyObject *args, int dir)
Christian Heimes44720832008-05-26 13:01:01 +00001714{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001715 PyObject *subobj;
1716 const char *sub;
1717 Py_ssize_t sub_len;
1718 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00001719
Jesus Cea44e81682011-04-20 16:39:15 +02001720 if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1721 args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001722 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001723
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001724 if (PyString_Check(subobj)) {
1725 sub = PyString_AS_STRING(subobj);
1726 sub_len = PyString_GET_SIZE(subobj);
1727 }
Christian Heimes44720832008-05-26 13:01:01 +00001728#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001729 else if (PyUnicode_Check(subobj))
1730 return PyUnicode_Find(
1731 (PyObject *)self, subobj, start, end, dir);
Christian Heimes44720832008-05-26 13:01:01 +00001732#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001733 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1734 /* XXX - the "expected a character buffer object" is pretty
1735 confusing for a non-expert. remap to something else ? */
1736 return -2;
Christian Heimes44720832008-05-26 13:01:01 +00001737
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001738 if (dir > 0)
1739 return stringlib_find_slice(
1740 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1741 sub, sub_len, start, end);
1742 else
1743 return stringlib_rfind_slice(
1744 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1745 sub, sub_len, start, end);
Christian Heimes44720832008-05-26 13:01:01 +00001746}
1747
1748
1749PyDoc_STRVAR(find__doc__,
1750"S.find(sub [,start [,end]]) -> int\n\
1751\n\
1752Return the lowest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001753such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001754arguments start and end are interpreted as in slice notation.\n\
1755\n\
1756Return -1 on failure.");
1757
1758static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001759string_find(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001760{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001761 Py_ssize_t result = string_find_internal(self, args, +1);
1762 if (result == -2)
1763 return NULL;
1764 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001765}
1766
1767
1768PyDoc_STRVAR(index__doc__,
1769"S.index(sub [,start [,end]]) -> int\n\
1770\n\
1771Like S.find() but raise ValueError when the substring is not found.");
1772
1773static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001774string_index(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001775{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001776 Py_ssize_t result = string_find_internal(self, args, +1);
1777 if (result == -2)
1778 return NULL;
1779 if (result == -1) {
1780 PyErr_SetString(PyExc_ValueError,
1781 "substring not found");
1782 return NULL;
1783 }
1784 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001785}
1786
1787
1788PyDoc_STRVAR(rfind__doc__,
1789"S.rfind(sub [,start [,end]]) -> int\n\
1790\n\
1791Return the highest index in S where substring sub is found,\n\
Senthil Kumaran5e3a19d2011-07-27 23:36:51 +08001792such that sub is contained within S[start:end]. Optional\n\
Christian Heimes44720832008-05-26 13:01:01 +00001793arguments start and end are interpreted as in slice notation.\n\
1794\n\
1795Return -1 on failure.");
1796
1797static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001798string_rfind(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001799{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001800 Py_ssize_t result = string_find_internal(self, args, -1);
1801 if (result == -2)
1802 return NULL;
1803 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001804}
1805
1806
1807PyDoc_STRVAR(rindex__doc__,
1808"S.rindex(sub [,start [,end]]) -> int\n\
1809\n\
1810Like S.rfind() but raise ValueError when the substring is not found.");
1811
1812static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001813string_rindex(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001814{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001815 Py_ssize_t result = string_find_internal(self, args, -1);
1816 if (result == -2)
1817 return NULL;
1818 if (result == -1) {
1819 PyErr_SetString(PyExc_ValueError,
1820 "substring not found");
1821 return NULL;
1822 }
1823 return PyInt_FromSsize_t(result);
Christian Heimes44720832008-05-26 13:01:01 +00001824}
1825
1826
1827Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001828do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
Christian Heimes44720832008-05-26 13:01:01 +00001829{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001830 char *s = PyString_AS_STRING(self);
1831 Py_ssize_t len = PyString_GET_SIZE(self);
1832 char *sep = PyString_AS_STRING(sepobj);
1833 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1834 Py_ssize_t i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001835
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001836 i = 0;
1837 if (striptype != RIGHTSTRIP) {
1838 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1839 i++;
1840 }
1841 }
Christian Heimes44720832008-05-26 13:01:01 +00001842
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001843 j = len;
1844 if (striptype != LEFTSTRIP) {
1845 do {
1846 j--;
1847 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1848 j++;
1849 }
Christian Heimes44720832008-05-26 13:01:01 +00001850
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001851 if (i == 0 && j == len && PyString_CheckExact(self)) {
1852 Py_INCREF(self);
1853 return (PyObject*)self;
1854 }
1855 else
1856 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001857}
1858
1859
1860Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001861do_strip(PyStringObject *self, int striptype)
Christian Heimes44720832008-05-26 13:01:01 +00001862{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001863 char *s = PyString_AS_STRING(self);
1864 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Christian Heimes44720832008-05-26 13:01:01 +00001865
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001866 i = 0;
1867 if (striptype != RIGHTSTRIP) {
1868 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1869 i++;
1870 }
1871 }
Christian Heimes44720832008-05-26 13:01:01 +00001872
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001873 j = len;
1874 if (striptype != LEFTSTRIP) {
1875 do {
1876 j--;
1877 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1878 j++;
1879 }
Christian Heimes44720832008-05-26 13:01:01 +00001880
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001881 if (i == 0 && j == len && PyString_CheckExact(self)) {
1882 Py_INCREF(self);
1883 return (PyObject*)self;
1884 }
1885 else
1886 return PyString_FromStringAndSize(s+i, j-i);
Christian Heimes44720832008-05-26 13:01:01 +00001887}
1888
1889
1890Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001891do_argstrip(PyStringObject *self, int striptype, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001892{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001893 PyObject *sep = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001894
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001895 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1896 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001897
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001898 if (sep != NULL && sep != Py_None) {
1899 if (PyString_Check(sep))
1900 return do_xstrip(self, striptype, sep);
Christian Heimes44720832008-05-26 13:01:01 +00001901#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001902 else if (PyUnicode_Check(sep)) {
1903 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1904 PyObject *res;
1905 if (uniself==NULL)
1906 return NULL;
1907 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1908 striptype, sep);
1909 Py_DECREF(uniself);
1910 return res;
1911 }
Christian Heimes44720832008-05-26 13:01:01 +00001912#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001913 PyErr_Format(PyExc_TypeError,
Christian Heimes44720832008-05-26 13:01:01 +00001914#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001915 "%s arg must be None, str or unicode",
Christian Heimes44720832008-05-26 13:01:01 +00001916#else
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001917 "%s arg must be None or str",
Christian Heimes44720832008-05-26 13:01:01 +00001918#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001919 STRIPNAME(striptype));
1920 return NULL;
1921 }
Christian Heimes44720832008-05-26 13:01:01 +00001922
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001923 return do_strip(self, striptype);
Christian Heimes44720832008-05-26 13:01:01 +00001924}
1925
1926
1927PyDoc_STRVAR(strip__doc__,
1928"S.strip([chars]) -> string or unicode\n\
1929\n\
1930Return a copy of the string S with leading and trailing\n\
1931whitespace removed.\n\
1932If chars is given and not None, remove characters in chars instead.\n\
1933If chars is unicode, S will be converted to unicode before stripping");
1934
1935static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001936string_strip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001937{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001938 if (PyTuple_GET_SIZE(args) == 0)
1939 return do_strip(self, BOTHSTRIP); /* Common case */
1940 else
1941 return do_argstrip(self, BOTHSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001942}
1943
1944
1945PyDoc_STRVAR(lstrip__doc__,
1946"S.lstrip([chars]) -> string or unicode\n\
1947\n\
1948Return a copy of the string S with leading whitespace removed.\n\
1949If chars is given and not None, remove characters in chars instead.\n\
1950If chars is unicode, S will be converted to unicode before stripping");
1951
1952static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001953string_lstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001954{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001955 if (PyTuple_GET_SIZE(args) == 0)
1956 return do_strip(self, LEFTSTRIP); /* Common case */
1957 else
1958 return do_argstrip(self, LEFTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001959}
1960
1961
1962PyDoc_STRVAR(rstrip__doc__,
1963"S.rstrip([chars]) -> string or unicode\n\
1964\n\
1965Return a copy of the string S with trailing whitespace removed.\n\
1966If chars is given and not None, remove characters in chars instead.\n\
1967If chars is unicode, S will be converted to unicode before stripping");
1968
1969static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001970string_rstrip(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00001971{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001972 if (PyTuple_GET_SIZE(args) == 0)
1973 return do_strip(self, RIGHTSTRIP); /* Common case */
1974 else
1975 return do_argstrip(self, RIGHTSTRIP, args);
Christian Heimes44720832008-05-26 13:01:01 +00001976}
1977
1978
1979PyDoc_STRVAR(lower__doc__,
1980"S.lower() -> string\n\
1981\n\
1982Return a copy of the string S converted to lowercase.");
1983
1984/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1985#ifndef _tolower
1986#define _tolower tolower
1987#endif
1988
1989static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00001990string_lower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00001991{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001992 char *s;
1993 Py_ssize_t i, n = PyString_GET_SIZE(self);
1994 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00001995
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001996 newobj = PyString_FromStringAndSize(NULL, n);
1997 if (!newobj)
1998 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00001999
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002000 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002001
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002002 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002003
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002004 for (i = 0; i < n; i++) {
2005 int c = Py_CHARMASK(s[i]);
2006 if (isupper(c))
2007 s[i] = _tolower(c);
2008 }
Christian Heimes44720832008-05-26 13:01:01 +00002009
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002010 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002011}
2012
2013PyDoc_STRVAR(upper__doc__,
2014"S.upper() -> string\n\
2015\n\
2016Return a copy of the string S converted to uppercase.");
2017
2018#ifndef _toupper
2019#define _toupper toupper
2020#endif
2021
2022static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002023string_upper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002024{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002025 char *s;
2026 Py_ssize_t i, n = PyString_GET_SIZE(self);
2027 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002028
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002029 newobj = PyString_FromStringAndSize(NULL, n);
2030 if (!newobj)
2031 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002032
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002033 s = PyString_AS_STRING(newobj);
Christian Heimes44720832008-05-26 13:01:01 +00002034
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002035 Py_MEMCPY(s, PyString_AS_STRING(self), n);
Christian Heimes44720832008-05-26 13:01:01 +00002036
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002037 for (i = 0; i < n; i++) {
2038 int c = Py_CHARMASK(s[i]);
2039 if (islower(c))
2040 s[i] = _toupper(c);
2041 }
Christian Heimes44720832008-05-26 13:01:01 +00002042
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002043 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002044}
2045
2046PyDoc_STRVAR(title__doc__,
2047"S.title() -> string\n\
2048\n\
2049Return a titlecased version of S, i.e. words start with uppercase\n\
2050characters, all remaining cased characters have lowercase.");
2051
2052static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002053string_title(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002054{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002055 char *s = PyString_AS_STRING(self), *s_new;
2056 Py_ssize_t i, n = PyString_GET_SIZE(self);
2057 int previous_is_cased = 0;
2058 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002059
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002060 newobj = PyString_FromStringAndSize(NULL, n);
2061 if (newobj == NULL)
2062 return NULL;
2063 s_new = PyString_AsString(newobj);
2064 for (i = 0; i < n; i++) {
2065 int c = Py_CHARMASK(*s++);
2066 if (islower(c)) {
2067 if (!previous_is_cased)
2068 c = toupper(c);
2069 previous_is_cased = 1;
2070 } else if (isupper(c)) {
2071 if (previous_is_cased)
2072 c = tolower(c);
2073 previous_is_cased = 1;
2074 } else
2075 previous_is_cased = 0;
2076 *s_new++ = c;
2077 }
2078 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002079}
2080
2081PyDoc_STRVAR(capitalize__doc__,
2082"S.capitalize() -> string\n\
2083\n\
2084Return a copy of the string S with only its first character\n\
2085capitalized.");
2086
2087static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002088string_capitalize(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002089{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002090 char *s = PyString_AS_STRING(self), *s_new;
2091 Py_ssize_t i, n = PyString_GET_SIZE(self);
2092 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002093
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002094 newobj = PyString_FromStringAndSize(NULL, n);
2095 if (newobj == NULL)
2096 return NULL;
2097 s_new = PyString_AsString(newobj);
2098 if (0 < n) {
2099 int c = Py_CHARMASK(*s++);
2100 if (islower(c))
2101 *s_new = toupper(c);
2102 else
2103 *s_new = c;
2104 s_new++;
2105 }
2106 for (i = 1; i < n; i++) {
2107 int c = Py_CHARMASK(*s++);
2108 if (isupper(c))
2109 *s_new = tolower(c);
2110 else
2111 *s_new = c;
2112 s_new++;
2113 }
2114 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002115}
2116
2117
2118PyDoc_STRVAR(count__doc__,
2119"S.count(sub[, start[, end]]) -> int\n\
2120\n\
2121Return the number of non-overlapping occurrences of substring sub in\n\
2122string S[start:end]. Optional arguments start and end are interpreted\n\
2123as in slice notation.");
2124
2125static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002126string_count(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002127{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002128 PyObject *sub_obj;
2129 const char *str = PyString_AS_STRING(self), *sub;
2130 Py_ssize_t sub_len;
2131 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Christian Heimes44720832008-05-26 13:01:01 +00002132
Jesus Cea44e81682011-04-20 16:39:15 +02002133 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002134 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002135
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002136 if (PyString_Check(sub_obj)) {
2137 sub = PyString_AS_STRING(sub_obj);
2138 sub_len = PyString_GET_SIZE(sub_obj);
2139 }
Christian Heimes44720832008-05-26 13:01:01 +00002140#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002141 else if (PyUnicode_Check(sub_obj)) {
2142 Py_ssize_t count;
2143 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2144 if (count == -1)
2145 return NULL;
2146 else
2147 return PyInt_FromSsize_t(count);
2148 }
Christian Heimes44720832008-05-26 13:01:01 +00002149#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002150 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2151 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002152
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002153 ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002154
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002155 return PyInt_FromSsize_t(
2156 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2157 );
Christian Heimes44720832008-05-26 13:01:01 +00002158}
2159
2160PyDoc_STRVAR(swapcase__doc__,
2161"S.swapcase() -> string\n\
2162\n\
2163Return a copy of the string S with uppercase characters\n\
2164converted to lowercase and vice versa.");
2165
2166static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002167string_swapcase(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002168{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002169 char *s = PyString_AS_STRING(self), *s_new;
2170 Py_ssize_t i, n = PyString_GET_SIZE(self);
2171 PyObject *newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002172
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002173 newobj = PyString_FromStringAndSize(NULL, n);
2174 if (newobj == NULL)
2175 return NULL;
2176 s_new = PyString_AsString(newobj);
2177 for (i = 0; i < n; i++) {
2178 int c = Py_CHARMASK(*s++);
2179 if (islower(c)) {
2180 *s_new = toupper(c);
2181 }
2182 else if (isupper(c)) {
2183 *s_new = tolower(c);
2184 }
2185 else
2186 *s_new = c;
2187 s_new++;
2188 }
2189 return newobj;
Christian Heimes44720832008-05-26 13:01:01 +00002190}
2191
2192
2193PyDoc_STRVAR(translate__doc__,
2194"S.translate(table [,deletechars]) -> string\n\
2195\n\
2196Return a copy of the string S, where all characters occurring\n\
2197in the optional argument deletechars are removed, and the\n\
2198remaining characters have been mapped through the given\n\
Mark Dickinsoncb9bf1a2011-06-25 11:00:12 +02002199translation table, which must be a string of length 256 or None.\n\
2200If the table argument is None, no translation is applied and\n\
2201the operation simply removes the characters in deletechars.");
Christian Heimes44720832008-05-26 13:01:01 +00002202
2203static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002204string_translate(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002205{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002206 register char *input, *output;
2207 const char *table;
2208 register Py_ssize_t i, c, changed = 0;
2209 PyObject *input_obj = (PyObject*)self;
2210 const char *output_start, *del_table=NULL;
2211 Py_ssize_t inlen, tablen, dellen = 0;
2212 PyObject *result;
2213 int trans_table[256];
2214 PyObject *tableobj, *delobj = NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002215
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002216 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2217 &tableobj, &delobj))
2218 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002219
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002220 if (PyString_Check(tableobj)) {
2221 table = PyString_AS_STRING(tableobj);
2222 tablen = PyString_GET_SIZE(tableobj);
2223 }
2224 else if (tableobj == Py_None) {
2225 table = NULL;
2226 tablen = 256;
2227 }
Christian Heimes44720832008-05-26 13:01:01 +00002228#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002229 else if (PyUnicode_Check(tableobj)) {
2230 /* Unicode .translate() does not support the deletechars
2231 parameter; instead a mapping to None will cause characters
2232 to be deleted. */
2233 if (delobj != NULL) {
2234 PyErr_SetString(PyExc_TypeError,
2235 "deletions are implemented differently for unicode");
2236 return NULL;
2237 }
2238 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2239 }
Christian Heimes44720832008-05-26 13:01:01 +00002240#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002241 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2242 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002243
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002244 if (tablen != 256) {
2245 PyErr_SetString(PyExc_ValueError,
2246 "translation table must be 256 characters long");
2247 return NULL;
2248 }
Christian Heimes44720832008-05-26 13:01:01 +00002249
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002250 if (delobj != NULL) {
2251 if (PyString_Check(delobj)) {
2252 del_table = PyString_AS_STRING(delobj);
2253 dellen = PyString_GET_SIZE(delobj);
2254 }
Christian Heimes44720832008-05-26 13:01:01 +00002255#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002256 else if (PyUnicode_Check(delobj)) {
2257 PyErr_SetString(PyExc_TypeError,
2258 "deletions are implemented differently for unicode");
2259 return NULL;
2260 }
Christian Heimes44720832008-05-26 13:01:01 +00002261#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002262 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2263 return NULL;
2264 }
2265 else {
2266 del_table = NULL;
2267 dellen = 0;
2268 }
Christian Heimes44720832008-05-26 13:01:01 +00002269
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002270 inlen = PyString_GET_SIZE(input_obj);
2271 result = PyString_FromStringAndSize((char *)NULL, inlen);
2272 if (result == NULL)
2273 return NULL;
2274 output_start = output = PyString_AsString(result);
2275 input = PyString_AS_STRING(input_obj);
Christian Heimes44720832008-05-26 13:01:01 +00002276
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002277 if (dellen == 0 && table != NULL) {
2278 /* If no deletions are required, use faster code */
2279 for (i = inlen; --i >= 0; ) {
2280 c = Py_CHARMASK(*input++);
2281 if (Py_CHARMASK((*output++ = table[c])) != c)
2282 changed = 1;
2283 }
2284 if (changed || !PyString_CheckExact(input_obj))
2285 return result;
2286 Py_DECREF(result);
2287 Py_INCREF(input_obj);
2288 return input_obj;
2289 }
Christian Heimes44720832008-05-26 13:01:01 +00002290
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002291 if (table == NULL) {
2292 for (i = 0; i < 256; i++)
2293 trans_table[i] = Py_CHARMASK(i);
2294 } else {
2295 for (i = 0; i < 256; i++)
2296 trans_table[i] = Py_CHARMASK(table[i]);
2297 }
Christian Heimes44720832008-05-26 13:01:01 +00002298
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002299 for (i = 0; i < dellen; i++)
2300 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
Christian Heimes44720832008-05-26 13:01:01 +00002301
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002302 for (i = inlen; --i >= 0; ) {
2303 c = Py_CHARMASK(*input++);
2304 if (trans_table[c] != -1)
2305 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2306 continue;
2307 changed = 1;
2308 }
2309 if (!changed && PyString_CheckExact(input_obj)) {
2310 Py_DECREF(result);
2311 Py_INCREF(input_obj);
2312 return input_obj;
2313 }
2314 /* Fix the size of the resulting string */
2315 if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2316 return NULL;
2317 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002318}
2319
2320
Christian Heimes44720832008-05-26 13:01:01 +00002321/* find and count characters and substrings */
2322
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002323#define findchar(target, target_len, c) \
Christian Heimes44720832008-05-26 13:01:01 +00002324 ((char *)memchr((const void *)(target), c, target_len))
2325
2326/* String ops must return a string. */
2327/* If the object is subclass of string, create a copy */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002328Py_LOCAL(PyStringObject *)
2329return_self(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00002330{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002331 if (PyString_CheckExact(self)) {
2332 Py_INCREF(self);
2333 return self;
2334 }
2335 return (PyStringObject *)PyString_FromStringAndSize(
2336 PyString_AS_STRING(self),
2337 PyString_GET_SIZE(self));
Christian Heimes44720832008-05-26 13:01:01 +00002338}
2339
2340Py_LOCAL_INLINE(Py_ssize_t)
Ronald Oussoren3687e802013-07-11 13:33:55 +02002341countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002342{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002343 Py_ssize_t count=0;
2344 const char *start=target;
2345 const char *end=target+target_len;
Christian Heimes44720832008-05-26 13:01:01 +00002346
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002347 while ( (start=findchar(start, end-start, c)) != NULL ) {
2348 count++;
2349 if (count >= maxcount)
2350 break;
2351 start += 1;
2352 }
2353 return count;
Christian Heimes44720832008-05-26 13:01:01 +00002354}
2355
Christian Heimes44720832008-05-26 13:01:01 +00002356
2357/* Algorithms for different cases of string replacement */
2358
2359/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002360Py_LOCAL(PyStringObject *)
2361replace_interleave(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002362 const char *to_s, Py_ssize_t to_len,
2363 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002364{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002365 char *self_s, *result_s;
2366 Py_ssize_t self_len, result_len;
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002367 Py_ssize_t count, i;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002368 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002369
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002370 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002371
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002372 /* 1 at the end plus 1 after every character;
2373 count = min(maxcount, self_len + 1) */
2374 if (maxcount <= self_len) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002375 count = maxcount;
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002376 }
2377 else {
2378 /* Can't overflow: self_len + 1 <= maxcount <= PY_SSIZE_T_MAX. */
2379 count = self_len + 1;
2380 }
Christian Heimes44720832008-05-26 13:01:01 +00002381
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002382 /* Check for overflow */
2383 /* result_len = count * to_len + self_len; */
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002384 assert(count > 0);
2385 if (to_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002386 PyErr_SetString(PyExc_OverflowError,
2387 "replace string is too long");
2388 return NULL;
2389 }
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002390 result_len = count * to_len + self_len;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002391 if (! (result = (PyStringObject *)
2392 PyString_FromStringAndSize(NULL, result_len)) )
2393 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002394
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002395 self_s = PyString_AS_STRING(self);
2396 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002397
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002398 /* TODO: special case single character, which doesn't need memcpy */
Christian Heimes44720832008-05-26 13:01:01 +00002399
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002400 /* Lay the first one down (guaranteed this will occur) */
2401 Py_MEMCPY(result_s, to_s, to_len);
2402 result_s += to_len;
2403 count -= 1;
Christian Heimes44720832008-05-26 13:01:01 +00002404
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002405 for (i=0; i<count; i++) {
2406 *result_s++ = *self_s++;
2407 Py_MEMCPY(result_s, to_s, to_len);
2408 result_s += to_len;
2409 }
2410
2411 /* Copy the rest of the original string */
2412 Py_MEMCPY(result_s, self_s, self_len-i);
2413
2414 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002415}
2416
2417/* Special case for deleting a single character */
2418/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002419Py_LOCAL(PyStringObject *)
2420replace_delete_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002421 char from_c, Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002422{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002423 char *self_s, *result_s;
2424 char *start, *next, *end;
2425 Py_ssize_t self_len, result_len;
2426 Py_ssize_t count;
2427 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002428
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002429 self_len = PyString_GET_SIZE(self);
2430 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002431
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002432 count = countchar(self_s, self_len, from_c, maxcount);
2433 if (count == 0) {
2434 return return_self(self);
2435 }
Christian Heimes44720832008-05-26 13:01:01 +00002436
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002437 result_len = self_len - count; /* from_len == 1 */
2438 assert(result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002439
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002440 if ( (result = (PyStringObject *)
2441 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2442 return NULL;
2443 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002444
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002445 start = self_s;
2446 end = self_s + self_len;
2447 while (count-- > 0) {
2448 next = findchar(start, end-start, from_c);
2449 if (next == NULL)
2450 break;
2451 Py_MEMCPY(result_s, start, next-start);
2452 result_s += (next-start);
2453 start = next+1;
2454 }
2455 Py_MEMCPY(result_s, start, end-start);
2456
2457 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002458}
2459
2460/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2461
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002462Py_LOCAL(PyStringObject *)
2463replace_delete_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002464 const char *from_s, Py_ssize_t from_len,
2465 Py_ssize_t maxcount) {
2466 char *self_s, *result_s;
2467 char *start, *next, *end;
2468 Py_ssize_t self_len, result_len;
2469 Py_ssize_t count, offset;
2470 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002471
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002472 self_len = PyString_GET_SIZE(self);
2473 self_s = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002474
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002475 count = stringlib_count(self_s, self_len,
2476 from_s, from_len,
2477 maxcount);
Christian Heimes44720832008-05-26 13:01:01 +00002478
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002479 if (count == 0) {
2480 /* no matches */
2481 return return_self(self);
2482 }
Christian Heimes44720832008-05-26 13:01:01 +00002483
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002484 result_len = self_len - (count * from_len);
2485 assert (result_len>=0);
Christian Heimes44720832008-05-26 13:01:01 +00002486
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002487 if ( (result = (PyStringObject *)
2488 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2489 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002490
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002491 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002492
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002493 start = self_s;
2494 end = self_s + self_len;
2495 while (count-- > 0) {
2496 offset = stringlib_find(start, end-start,
2497 from_s, from_len,
2498 0);
2499 if (offset == -1)
2500 break;
2501 next = start + offset;
Christian Heimes44720832008-05-26 13:01:01 +00002502
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002503 Py_MEMCPY(result_s, start, next-start);
Christian Heimes44720832008-05-26 13:01:01 +00002504
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002505 result_s += (next-start);
2506 start = next+from_len;
2507 }
2508 Py_MEMCPY(result_s, start, end-start);
2509 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002510}
2511
2512/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002513Py_LOCAL(PyStringObject *)
2514replace_single_character_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002515 char from_c, char to_c,
2516 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002517{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002518 char *self_s, *result_s, *start, *end, *next;
2519 Py_ssize_t self_len;
2520 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002521
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002522 /* The result string will be the same size */
2523 self_s = PyString_AS_STRING(self);
2524 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002525
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002526 next = findchar(self_s, self_len, from_c);
Christian Heimes44720832008-05-26 13:01:01 +00002527
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002528 if (next == NULL) {
2529 /* No matches; return the original string */
2530 return return_self(self);
2531 }
Christian Heimes44720832008-05-26 13:01:01 +00002532
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002533 /* Need to make a new string */
2534 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2535 if (result == NULL)
2536 return NULL;
2537 result_s = PyString_AS_STRING(result);
2538 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002539
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002540 /* change everything in-place, starting with this one */
2541 start = result_s + (next-self_s);
2542 *start = to_c;
2543 start++;
2544 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002545
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002546 while (--maxcount > 0) {
2547 next = findchar(start, end-start, from_c);
2548 if (next == NULL)
2549 break;
2550 *next = to_c;
2551 start = next+1;
2552 }
Christian Heimes44720832008-05-26 13:01:01 +00002553
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002554 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002555}
2556
2557/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002558Py_LOCAL(PyStringObject *)
2559replace_substring_in_place(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002560 const char *from_s, Py_ssize_t from_len,
2561 const char *to_s, Py_ssize_t to_len,
2562 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002563{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002564 char *result_s, *start, *end;
2565 char *self_s;
2566 Py_ssize_t self_len, offset;
2567 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002568
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002569 /* The result string will be the same size */
Christian Heimes44720832008-05-26 13:01:01 +00002570
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002571 self_s = PyString_AS_STRING(self);
2572 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002573
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002574 offset = stringlib_find(self_s, self_len,
2575 from_s, from_len,
2576 0);
2577 if (offset == -1) {
2578 /* No matches; return the original string */
2579 return return_self(self);
2580 }
Christian Heimes44720832008-05-26 13:01:01 +00002581
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002582 /* Need to make a new string */
2583 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2584 if (result == NULL)
2585 return NULL;
2586 result_s = PyString_AS_STRING(result);
2587 Py_MEMCPY(result_s, self_s, self_len);
Christian Heimes44720832008-05-26 13:01:01 +00002588
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002589 /* change everything in-place, starting with this one */
2590 start = result_s + offset;
2591 Py_MEMCPY(start, to_s, from_len);
2592 start += from_len;
2593 end = result_s + self_len;
Christian Heimes44720832008-05-26 13:01:01 +00002594
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002595 while ( --maxcount > 0) {
2596 offset = stringlib_find(start, end-start,
2597 from_s, from_len,
2598 0);
2599 if (offset==-1)
2600 break;
2601 Py_MEMCPY(start+offset, to_s, from_len);
2602 start += offset+from_len;
2603 }
Christian Heimes44720832008-05-26 13:01:01 +00002604
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002605 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002606}
2607
2608/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002609Py_LOCAL(PyStringObject *)
2610replace_single_character(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002611 char from_c,
2612 const char *to_s, Py_ssize_t to_len,
2613 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002614{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002615 char *self_s, *result_s;
2616 char *start, *next, *end;
2617 Py_ssize_t self_len, result_len;
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002618 Py_ssize_t count;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002619 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002620
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002621 self_s = PyString_AS_STRING(self);
2622 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002623
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002624 count = countchar(self_s, self_len, from_c, maxcount);
2625 if (count == 0) {
2626 /* no matches, return unchanged */
2627 return return_self(self);
2628 }
Christian Heimes44720832008-05-26 13:01:01 +00002629
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002630 /* use the difference between current and new, hence the "-1" */
2631 /* result_len = self_len + count * (to_len-1) */
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002632 assert(count > 0);
2633 if (to_len - 1 > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002634 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2635 return NULL;
2636 }
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002637 result_len = self_len + count * (to_len - 1);
Christian Heimes44720832008-05-26 13:01:01 +00002638
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002639 if ( (result = (PyStringObject *)
2640 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2641 return NULL;
2642 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002643
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002644 start = self_s;
2645 end = self_s + self_len;
2646 while (count-- > 0) {
2647 next = findchar(start, end-start, from_c);
2648 if (next == NULL)
2649 break;
Christian Heimes44720832008-05-26 13:01:01 +00002650
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002651 if (next == start) {
2652 /* replace with the 'to' */
2653 Py_MEMCPY(result_s, to_s, to_len);
2654 result_s += to_len;
2655 start += 1;
2656 } else {
2657 /* copy the unchanged old then the 'to' */
2658 Py_MEMCPY(result_s, start, next-start);
2659 result_s += (next-start);
2660 Py_MEMCPY(result_s, to_s, to_len);
2661 result_s += to_len;
2662 start = next+1;
2663 }
2664 }
2665 /* Copy the remainder of the remaining string */
2666 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002667
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002668 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002669}
2670
2671/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002672Py_LOCAL(PyStringObject *)
2673replace_substring(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002674 const char *from_s, Py_ssize_t from_len,
2675 const char *to_s, Py_ssize_t to_len,
2676 Py_ssize_t maxcount) {
2677 char *self_s, *result_s;
2678 char *start, *next, *end;
2679 Py_ssize_t self_len, result_len;
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002680 Py_ssize_t count, offset;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002681 PyStringObject *result;
Christian Heimes44720832008-05-26 13:01:01 +00002682
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002683 self_s = PyString_AS_STRING(self);
2684 self_len = PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00002685
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002686 count = stringlib_count(self_s, self_len,
2687 from_s, from_len,
2688 maxcount);
Antoine Pitrou64672132010-01-13 07:55:48 +00002689
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002690 if (count == 0) {
2691 /* no matches, return unchanged */
2692 return return_self(self);
2693 }
Christian Heimes44720832008-05-26 13:01:01 +00002694
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002695 /* Check for overflow */
2696 /* result_len = self_len + count * (to_len-from_len) */
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002697 assert(count > 0);
2698 if (to_len - from_len > (PY_SSIZE_T_MAX - self_len) / count) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002699 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2700 return NULL;
2701 }
Xiang Zhang7bdb5162017-01-09 11:13:20 +08002702 result_len = self_len + count * (to_len - from_len);
Christian Heimes44720832008-05-26 13:01:01 +00002703
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002704 if ( (result = (PyStringObject *)
2705 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2706 return NULL;
2707 result_s = PyString_AS_STRING(result);
Christian Heimes44720832008-05-26 13:01:01 +00002708
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002709 start = self_s;
2710 end = self_s + self_len;
2711 while (count-- > 0) {
2712 offset = stringlib_find(start, end-start,
2713 from_s, from_len,
2714 0);
2715 if (offset == -1)
2716 break;
2717 next = start+offset;
2718 if (next == start) {
2719 /* replace with the 'to' */
2720 Py_MEMCPY(result_s, to_s, to_len);
2721 result_s += to_len;
2722 start += from_len;
2723 } else {
2724 /* copy the unchanged old then the 'to' */
2725 Py_MEMCPY(result_s, start, next-start);
2726 result_s += (next-start);
2727 Py_MEMCPY(result_s, to_s, to_len);
2728 result_s += to_len;
2729 start = next+from_len;
2730 }
2731 }
2732 /* Copy the remainder of the remaining string */
2733 Py_MEMCPY(result_s, start, end-start);
Christian Heimes44720832008-05-26 13:01:01 +00002734
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002735 return result;
Christian Heimes44720832008-05-26 13:01:01 +00002736}
2737
2738
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002739Py_LOCAL(PyStringObject *)
2740replace(PyStringObject *self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002741 const char *from_s, Py_ssize_t from_len,
2742 const char *to_s, Py_ssize_t to_len,
2743 Py_ssize_t maxcount)
Christian Heimes44720832008-05-26 13:01:01 +00002744{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002745 if (maxcount < 0) {
2746 maxcount = PY_SSIZE_T_MAX;
2747 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2748 /* nothing to do; return the original string */
2749 return return_self(self);
2750 }
Christian Heimes44720832008-05-26 13:01:01 +00002751
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002752 if (maxcount == 0 ||
2753 (from_len == 0 && to_len == 0)) {
2754 /* nothing to do; return the original string */
2755 return return_self(self);
2756 }
Christian Heimes44720832008-05-26 13:01:01 +00002757
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002758 /* Handle zero-length special cases */
Christian Heimes44720832008-05-26 13:01:01 +00002759
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002760 if (from_len == 0) {
2761 /* insert the 'to' string everywhere. */
2762 /* >>> "Python".replace("", ".") */
2763 /* '.P.y.t.h.o.n.' */
2764 return replace_interleave(self, to_s, to_len, maxcount);
2765 }
Christian Heimes44720832008-05-26 13:01:01 +00002766
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002767 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2768 /* point for an empty self string to generate a non-empty string */
2769 /* Special case so the remaining code always gets a non-empty string */
2770 if (PyString_GET_SIZE(self) == 0) {
2771 return return_self(self);
2772 }
Christian Heimes44720832008-05-26 13:01:01 +00002773
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002774 if (to_len == 0) {
Martin Panter440bbd02016-09-08 05:22:16 +00002775 /* delete all occurrences of 'from' string */
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002776 if (from_len == 1) {
2777 return replace_delete_single_character(
2778 self, from_s[0], maxcount);
2779 } else {
2780 return replace_delete_substring(self, from_s, from_len, maxcount);
2781 }
2782 }
Christian Heimes44720832008-05-26 13:01:01 +00002783
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002784 /* Handle special case where both strings have the same length */
Christian Heimes44720832008-05-26 13:01:01 +00002785
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002786 if (from_len == to_len) {
2787 if (from_len == 1) {
2788 return replace_single_character_in_place(
2789 self,
2790 from_s[0],
2791 to_s[0],
2792 maxcount);
2793 } else {
2794 return replace_substring_in_place(
2795 self, from_s, from_len, to_s, to_len, maxcount);
2796 }
2797 }
Christian Heimes44720832008-05-26 13:01:01 +00002798
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002799 /* Otherwise use the more generic algorithms */
2800 if (from_len == 1) {
2801 return replace_single_character(self, from_s[0],
2802 to_s, to_len, maxcount);
2803 } else {
2804 /* len('from')>=2, len('to')>=1 */
2805 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2806 }
Christian Heimes44720832008-05-26 13:01:01 +00002807}
2808
2809PyDoc_STRVAR(replace__doc__,
Ezio Melotti2f06b782010-06-26 18:44:42 +00002810"S.replace(old, new[, count]) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00002811\n\
2812Return a copy of string S with all occurrences of substring\n\
2813old replaced by new. If the optional argument count is\n\
2814given, only the first count occurrences are replaced.");
2815
2816static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002817string_replace(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002818{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002819 Py_ssize_t count = -1;
2820 PyObject *from, *to;
2821 const char *from_s, *to_s;
2822 Py_ssize_t from_len, to_len;
Christian Heimes44720832008-05-26 13:01:01 +00002823
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002824 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2825 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002826
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002827 if (PyString_Check(from)) {
2828 from_s = PyString_AS_STRING(from);
2829 from_len = PyString_GET_SIZE(from);
2830 }
Christian Heimes44720832008-05-26 13:01:01 +00002831#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002832 if (PyUnicode_Check(from))
2833 return PyUnicode_Replace((PyObject *)self,
2834 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002835#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002836 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2837 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002838
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002839 if (PyString_Check(to)) {
2840 to_s = PyString_AS_STRING(to);
2841 to_len = PyString_GET_SIZE(to);
2842 }
Christian Heimes44720832008-05-26 13:01:01 +00002843#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002844 else if (PyUnicode_Check(to))
2845 return PyUnicode_Replace((PyObject *)self,
2846 from, to, count);
Christian Heimes44720832008-05-26 13:01:01 +00002847#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002848 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2849 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00002850
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002851 return (PyObject *)replace((PyStringObject *) self,
2852 from_s, from_len,
2853 to_s, to_len, count);
Christian Heimes44720832008-05-26 13:01:01 +00002854}
2855
2856/** End DALKE **/
2857
2858/* Matches the end (direction >= 0) or start (direction < 0) of self
2859 * against substr, using the start and end arguments. Returns
2860 * -1 on error, 0 if not found and 1 if found.
2861 */
2862Py_LOCAL(int)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002863_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002864 Py_ssize_t end, int direction)
Christian Heimes44720832008-05-26 13:01:01 +00002865{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002866 Py_ssize_t len = PyString_GET_SIZE(self);
2867 Py_ssize_t slen;
2868 const char* sub;
2869 const char* str;
Christian Heimes44720832008-05-26 13:01:01 +00002870
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002871 if (PyString_Check(substr)) {
2872 sub = PyString_AS_STRING(substr);
2873 slen = PyString_GET_SIZE(substr);
2874 }
Christian Heimes44720832008-05-26 13:01:01 +00002875#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002876 else if (PyUnicode_Check(substr))
2877 return PyUnicode_Tailmatch((PyObject *)self,
2878 substr, start, end, direction);
Christian Heimes44720832008-05-26 13:01:01 +00002879#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002880 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2881 return -1;
2882 str = PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00002883
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002884 ADJUST_INDICES(start, end, len);
Christian Heimes44720832008-05-26 13:01:01 +00002885
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002886 if (direction < 0) {
2887 /* startswith */
2888 if (start+slen > len)
2889 return 0;
2890 } else {
2891 /* endswith */
2892 if (end-start < slen || start > len)
2893 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002894
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002895 if (end-slen > start)
2896 start = end - slen;
2897 }
2898 if (end-start >= slen)
2899 return ! memcmp(str+start, sub, slen);
2900 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00002901}
2902
2903
2904PyDoc_STRVAR(startswith__doc__,
2905"S.startswith(prefix[, start[, end]]) -> bool\n\
2906\n\
2907Return True if S starts with the specified prefix, False otherwise.\n\
2908With optional start, test S beginning at that position.\n\
2909With optional end, stop comparing S at that position.\n\
2910prefix can also be a tuple of strings to try.");
2911
2912static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002913string_startswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002914{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002915 Py_ssize_t start = 0;
2916 Py_ssize_t end = PY_SSIZE_T_MAX;
2917 PyObject *subobj;
2918 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002919
Jesus Cea44e81682011-04-20 16:39:15 +02002920 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002921 return NULL;
2922 if (PyTuple_Check(subobj)) {
2923 Py_ssize_t i;
2924 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2925 result = _string_tailmatch(self,
2926 PyTuple_GET_ITEM(subobj, i),
2927 start, end, -1);
2928 if (result == -1)
2929 return NULL;
2930 else if (result) {
2931 Py_RETURN_TRUE;
2932 }
2933 }
2934 Py_RETURN_FALSE;
2935 }
2936 result = _string_tailmatch(self, subobj, start, end, -1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002937 if (result == -1) {
2938 if (PyErr_ExceptionMatches(PyExc_TypeError))
2939 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2940 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002941 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002942 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002943 else
2944 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002945}
2946
2947
2948PyDoc_STRVAR(endswith__doc__,
2949"S.endswith(suffix[, start[, end]]) -> bool\n\
2950\n\
2951Return True if S ends with the specified suffix, False otherwise.\n\
2952With optional start, test S beginning at that position.\n\
2953With optional end, stop comparing S at that position.\n\
2954suffix can also be a tuple of strings to try.");
2955
2956static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00002957string_endswith(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00002958{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002959 Py_ssize_t start = 0;
2960 Py_ssize_t end = PY_SSIZE_T_MAX;
2961 PyObject *subobj;
2962 int result;
Christian Heimes44720832008-05-26 13:01:01 +00002963
Jesus Cea44e81682011-04-20 16:39:15 +02002964 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002965 return NULL;
2966 if (PyTuple_Check(subobj)) {
2967 Py_ssize_t i;
2968 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2969 result = _string_tailmatch(self,
2970 PyTuple_GET_ITEM(subobj, i),
2971 start, end, +1);
2972 if (result == -1)
2973 return NULL;
2974 else if (result) {
2975 Py_RETURN_TRUE;
2976 }
2977 }
2978 Py_RETURN_FALSE;
2979 }
2980 result = _string_tailmatch(self, subobj, start, end, +1);
Ezio Melottie3685f62011-04-26 05:12:51 +03002981 if (result == -1) {
2982 if (PyErr_ExceptionMatches(PyExc_TypeError))
2983 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2984 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002985 return NULL;
Ezio Melottie3685f62011-04-26 05:12:51 +03002986 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00002987 else
2988 return PyBool_FromLong(result);
Christian Heimes44720832008-05-26 13:01:01 +00002989}
2990
2991
2992PyDoc_STRVAR(encode__doc__,
2993"S.encode([encoding[,errors]]) -> object\n\
2994\n\
2995Encodes S using the codec registered for encoding. encoding defaults\n\
2996to the default encoding. errors may be given to set a different error\n\
2997handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2998a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2999'xmlcharrefreplace' as well as any other name registered with\n\
3000codecs.register_error that is able to handle UnicodeEncodeErrors.");
3001
3002static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003003string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes44720832008-05-26 13:01:01 +00003004{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003005 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003006 char *encoding = NULL;
3007 char *errors = NULL;
3008 PyObject *v;
3009
Benjamin Peterson332d7212009-09-18 21:14:55 +00003010 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003011 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003012 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003013 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003014 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003015 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003016 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003017 PyErr_Format(PyExc_TypeError,
3018 "encoder did not return a string/unicode object "
3019 "(type=%.400s)",
3020 Py_TYPE(v)->tp_name);
3021 Py_DECREF(v);
3022 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003023 }
3024 return v;
3025
3026 onError:
Christian Heimes1a6387e2008-03-26 12:49:49 +00003027 return NULL;
3028}
3029
Christian Heimes44720832008-05-26 13:01:01 +00003030
3031PyDoc_STRVAR(decode__doc__,
3032"S.decode([encoding[,errors]]) -> object\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003033\n\
Christian Heimes44720832008-05-26 13:01:01 +00003034Decodes S using the codec registered for encoding. encoding defaults\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003035to the default encoding. errors may be given to set a different error\n\
Christian Heimes44720832008-05-26 13:01:01 +00003036handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3037a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003038as well as any other name registered with codecs.register_error that is\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003039able to handle UnicodeDecodeErrors.");
3040
3041static PyObject *
Benjamin Peterson332d7212009-09-18 21:14:55 +00003042string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003043{
Benjamin Peterson332d7212009-09-18 21:14:55 +00003044 static char *kwlist[] = {"encoding", "errors", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003045 char *encoding = NULL;
3046 char *errors = NULL;
3047 PyObject *v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003048
Benjamin Peterson332d7212009-09-18 21:14:55 +00003049 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003050 kwlist, &encoding, &errors))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003051 return NULL;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003052 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Christian Heimes44720832008-05-26 13:01:01 +00003053 if (v == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003054 goto onError;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003055 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003056 PyErr_Format(PyExc_TypeError,
3057 "decoder did not return a string/unicode object "
3058 "(type=%.400s)",
3059 Py_TYPE(v)->tp_name);
3060 Py_DECREF(v);
3061 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003062 }
3063 return v;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003064
Christian Heimes44720832008-05-26 13:01:01 +00003065 onError:
3066 return NULL;
3067}
3068
3069
3070PyDoc_STRVAR(expandtabs__doc__,
3071"S.expandtabs([tabsize]) -> string\n\
3072\n\
3073Return a copy of S where all tab characters are expanded using spaces.\n\
3074If tabsize is not given, a tab size of 8 characters is assumed.");
3075
3076static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003077string_expandtabs(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003078{
3079 const char *e, *p, *qe;
3080 char *q;
3081 Py_ssize_t i, j, incr;
3082 PyObject *u;
3083 int tabsize = 8;
3084
3085 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003086 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003087
3088 /* First pass: determine size of output string */
3089 i = 0; /* chars up to and including most recent \n or \r */
3090 j = 0; /* chars since most recent \n or \r (use in tab calculations) */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003091 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
Benjamin Peterson0e431b92014-03-30 19:16:44 -04003092 for (p = PyString_AS_STRING(self); p < e; p++) {
3093 if (*p == '\t') {
3094 if (tabsize > 0) {
3095 incr = tabsize - (j % tabsize);
3096 if (j > PY_SSIZE_T_MAX - incr)
3097 goto overflow1;
3098 j += incr;
3099 }
Christian Heimes1a6387e2008-03-26 12:49:49 +00003100 }
Benjamin Peterson0e431b92014-03-30 19:16:44 -04003101 else {
3102 if (j > PY_SSIZE_T_MAX - 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003103 goto overflow1;
Benjamin Peterson0e431b92014-03-30 19:16:44 -04003104 j++;
3105 if (*p == '\n' || *p == '\r') {
3106 if (i > PY_SSIZE_T_MAX - j)
3107 goto overflow1;
3108 i += j;
3109 j = 0;
3110 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003111 }
3112 }
Christian Heimes44720832008-05-26 13:01:01 +00003113
3114 if (i > PY_SSIZE_T_MAX - j)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003115 goto overflow1;
Christian Heimes44720832008-05-26 13:01:01 +00003116
3117 /* Second pass: create output string and fill it */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003118 u = PyString_FromStringAndSize(NULL, i + j);
Christian Heimes44720832008-05-26 13:01:01 +00003119 if (!u)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003120 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003121
3122 j = 0; /* same as in first pass */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003123 q = PyString_AS_STRING(u); /* next output char */
3124 qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
Christian Heimes44720832008-05-26 13:01:01 +00003125
Benjamin Peterson8312ecc2014-03-30 19:23:24 -04003126 for (p = PyString_AS_STRING(self); p < e; p++) {
3127 if (*p == '\t') {
3128 if (tabsize > 0) {
3129 i = tabsize - (j % tabsize);
3130 j += i;
3131 while (i--) {
3132 if (q >= qe)
3133 goto overflow2;
3134 *q++ = ' ';
3135 }
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003136 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003137 }
Benjamin Peterson8312ecc2014-03-30 19:23:24 -04003138 else {
3139 if (q >= qe)
3140 goto overflow2;
3141 *q++ = *p;
3142 j++;
3143 if (*p == '\n' || *p == '\r')
3144 j = 0;
3145 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003146 }
Christian Heimes44720832008-05-26 13:01:01 +00003147
3148 return u;
3149
3150 overflow2:
3151 Py_DECREF(u);
3152 overflow1:
3153 PyErr_SetString(PyExc_OverflowError, "new string is too long");
3154 return NULL;
3155}
3156
3157Py_LOCAL_INLINE(PyObject *)
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003158pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Christian Heimes44720832008-05-26 13:01:01 +00003159{
3160 PyObject *u;
3161
3162 if (left < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003163 left = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003164 if (right < 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003165 right = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003166
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003167 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003168 Py_INCREF(self);
3169 return (PyObject *)self;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003170 }
3171
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003172 u = PyString_FromStringAndSize(NULL,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003173 left + PyString_GET_SIZE(self) + right);
Christian Heimes44720832008-05-26 13:01:01 +00003174 if (u) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003175 if (left)
3176 memset(PyString_AS_STRING(u), fill, left);
3177 Py_MEMCPY(PyString_AS_STRING(u) + left,
3178 PyString_AS_STRING(self),
3179 PyString_GET_SIZE(self));
3180 if (right)
3181 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3182 fill, right);
Christian Heimes44720832008-05-26 13:01:01 +00003183 }
3184
3185 return u;
3186}
3187
3188PyDoc_STRVAR(ljust__doc__,
3189"S.ljust(width[, fillchar]) -> string\n"
3190"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003191"Return S left-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003192"done using the specified fill character (default is a space).");
3193
3194static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003195string_ljust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003196{
3197 Py_ssize_t width;
3198 char fillchar = ' ';
3199
3200 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003201 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003202
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003203 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003204 Py_INCREF(self);
3205 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003206 }
3207
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003208 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003209}
3210
3211
3212PyDoc_STRVAR(rjust__doc__,
3213"S.rjust(width[, fillchar]) -> string\n"
3214"\n"
Andrew M. Kuchlingefeb43e2008-10-04 01:05:56 +00003215"Return S right-justified in a string of length width. Padding is\n"
Christian Heimes44720832008-05-26 13:01:01 +00003216"done using the specified fill character (default is a space)");
3217
3218static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003219string_rjust(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003220{
3221 Py_ssize_t width;
3222 char fillchar = ' ';
3223
3224 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003225 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003226
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003227 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003228 Py_INCREF(self);
3229 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003230 }
3231
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003232 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Christian Heimes44720832008-05-26 13:01:01 +00003233}
3234
3235
3236PyDoc_STRVAR(center__doc__,
3237"S.center(width[, fillchar]) -> string\n"
3238"\n"
3239"Return S centered in a string of length width. Padding is\n"
3240"done using the specified fill character (default is a space)");
3241
3242static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003243string_center(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003244{
3245 Py_ssize_t marg, left;
3246 Py_ssize_t width;
3247 char fillchar = ' ';
3248
3249 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003250 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003251
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003252 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003253 Py_INCREF(self);
3254 return (PyObject*) self;
Christian Heimes44720832008-05-26 13:01:01 +00003255 }
3256
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003257 marg = width - PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003258 left = marg / 2 + (marg & width & 1);
3259
3260 return pad(self, left, marg - left, fillchar);
3261}
3262
3263PyDoc_STRVAR(zfill__doc__,
3264"S.zfill(width) -> string\n"
3265"\n"
3266"Pad a numeric string S with zeros on the left, to fill a field\n"
3267"of the specified width. The string S is never truncated.");
3268
3269static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003270string_zfill(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003271{
3272 Py_ssize_t fill;
3273 PyObject *s;
3274 char *p;
3275 Py_ssize_t width;
3276
3277 if (!PyArg_ParseTuple(args, "n:zfill", &width))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003278 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003279
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003280 if (PyString_GET_SIZE(self) >= width) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003281 if (PyString_CheckExact(self)) {
3282 Py_INCREF(self);
3283 return (PyObject*) self;
3284 }
3285 else
3286 return PyString_FromStringAndSize(
Martin Panterca56dd42016-09-17 07:54:55 +00003287 PyString_AS_STRING(self),
3288 PyString_GET_SIZE(self)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003289 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003290 }
3291
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003292 fill = width - PyString_GET_SIZE(self);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003293
Christian Heimes44720832008-05-26 13:01:01 +00003294 s = pad(self, fill, 0, '0');
3295
3296 if (s == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003297 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003298
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003299 p = PyString_AS_STRING(s);
Christian Heimes44720832008-05-26 13:01:01 +00003300 if (p[fill] == '+' || p[fill] == '-') {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003301 /* move sign to beginning of string */
3302 p[0] = p[fill];
3303 p[fill] = '0';
Christian Heimes44720832008-05-26 13:01:01 +00003304 }
3305
3306 return (PyObject*) s;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003307}
3308
Christian Heimes44720832008-05-26 13:01:01 +00003309PyDoc_STRVAR(isspace__doc__,
3310"S.isspace() -> bool\n\
Christian Heimes1a6387e2008-03-26 12:49:49 +00003311\n\
Christian Heimes44720832008-05-26 13:01:01 +00003312Return True if all characters in S are whitespace\n\
3313and there is at least one character in S, False otherwise.");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003314
Christian Heimes44720832008-05-26 13:01:01 +00003315static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003316string_isspace(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003317{
Christian Heimes44720832008-05-26 13:01:01 +00003318 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003319 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003320 register const unsigned char *e;
3321
3322 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003323 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003324 isspace(*p))
3325 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003326
3327 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003328 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003329 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003330
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003331 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003332 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003333 if (!isspace(*p))
3334 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003335 }
Christian Heimes44720832008-05-26 13:01:01 +00003336 return PyBool_FromLong(1);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003337}
3338
Christian Heimes44720832008-05-26 13:01:01 +00003339
3340PyDoc_STRVAR(isalpha__doc__,
3341"S.isalpha() -> bool\n\
3342\n\
3343Return True if all characters in S are alphabetic\n\
3344and there is at least one character in S, False otherwise.");
3345
3346static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003347string_isalpha(PyStringObject *self)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003348{
Christian Heimes44720832008-05-26 13:01:01 +00003349 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003350 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003351 register const unsigned char *e;
Christian Heimes1a6387e2008-03-26 12:49:49 +00003352
Christian Heimes44720832008-05-26 13:01:01 +00003353 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003354 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003355 isalpha(*p))
3356 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003357
3358 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003359 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003360 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003361
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003362 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003363 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003364 if (!isalpha(*p))
3365 return PyBool_FromLong(0);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003366 }
Christian Heimes44720832008-05-26 13:01:01 +00003367 return PyBool_FromLong(1);
3368}
Christian Heimes1a6387e2008-03-26 12:49:49 +00003369
Christian Heimes44720832008-05-26 13:01:01 +00003370
3371PyDoc_STRVAR(isalnum__doc__,
3372"S.isalnum() -> bool\n\
3373\n\
3374Return True if all characters in S are alphanumeric\n\
3375and there is at least one character in S, False otherwise.");
3376
3377static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003378string_isalnum(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003379{
3380 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003381 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003382 register const unsigned char *e;
3383
3384 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003385 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003386 isalnum(*p))
3387 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003388
3389 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003390 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003391 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003392
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003393 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003394 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003395 if (!isalnum(*p))
3396 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003397 }
3398 return PyBool_FromLong(1);
3399}
3400
3401
3402PyDoc_STRVAR(isdigit__doc__,
3403"S.isdigit() -> bool\n\
3404\n\
3405Return True if all characters in S are digits\n\
3406and there is at least one character in S, False otherwise.");
3407
3408static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003409string_isdigit(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003410{
3411 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003412 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003413 register const unsigned char *e;
3414
3415 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003416 if (PyString_GET_SIZE(self) == 1 &&
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003417 isdigit(*p))
3418 return PyBool_FromLong(1);
Christian Heimes44720832008-05-26 13:01:01 +00003419
3420 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003421 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003422 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003423
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003424 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003425 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003426 if (!isdigit(*p))
3427 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003428 }
3429 return PyBool_FromLong(1);
3430}
3431
3432
3433PyDoc_STRVAR(islower__doc__,
3434"S.islower() -> bool\n\
3435\n\
3436Return True if all cased characters in S are lowercase and there is\n\
3437at least one cased character in S, False otherwise.");
3438
3439static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003440string_islower(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003441{
3442 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003443 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003444 register const unsigned char *e;
3445 int cased;
3446
3447 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003448 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003449 return PyBool_FromLong(islower(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003450
3451 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003452 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003453 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003454
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003455 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003456 cased = 0;
3457 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003458 if (isupper(*p))
3459 return PyBool_FromLong(0);
3460 else if (!cased && islower(*p))
3461 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003462 }
3463 return PyBool_FromLong(cased);
3464}
3465
3466
3467PyDoc_STRVAR(isupper__doc__,
3468"S.isupper() -> bool\n\
3469\n\
3470Return True if all cased characters in S are uppercase and there is\n\
3471at least one cased character in S, False otherwise.");
3472
3473static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003474string_isupper(PyStringObject *self)
Christian Heimes44720832008-05-26 13:01:01 +00003475{
3476 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003477 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003478 register const unsigned char *e;
3479 int cased;
3480
3481 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003482 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003483 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003484
3485 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003486 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003487 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003488
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003489 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003490 cased = 0;
3491 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003492 if (islower(*p))
3493 return PyBool_FromLong(0);
3494 else if (!cased && isupper(*p))
3495 cased = 1;
Christian Heimes44720832008-05-26 13:01:01 +00003496 }
3497 return PyBool_FromLong(cased);
3498}
3499
3500
3501PyDoc_STRVAR(istitle__doc__,
3502"S.istitle() -> bool\n\
3503\n\
3504Return True if S is a titlecased string and there is at least one\n\
3505character in S, i.e. uppercase characters may only follow uncased\n\
3506characters and lowercase characters only cased ones. Return False\n\
3507otherwise.");
3508
3509static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003510string_istitle(PyStringObject *self, PyObject *uncased)
Christian Heimes44720832008-05-26 13:01:01 +00003511{
3512 register const unsigned char *p
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003513 = (unsigned char *) PyString_AS_STRING(self);
Christian Heimes44720832008-05-26 13:01:01 +00003514 register const unsigned char *e;
3515 int cased, previous_is_cased;
3516
3517 /* Shortcut for single character strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003518 if (PyString_GET_SIZE(self) == 1)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003519 return PyBool_FromLong(isupper(*p) != 0);
Christian Heimes44720832008-05-26 13:01:01 +00003520
3521 /* Special case for empty strings */
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003522 if (PyString_GET_SIZE(self) == 0)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003523 return PyBool_FromLong(0);
Christian Heimes44720832008-05-26 13:01:01 +00003524
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003525 e = p + PyString_GET_SIZE(self);
Christian Heimes44720832008-05-26 13:01:01 +00003526 cased = 0;
3527 previous_is_cased = 0;
3528 for (; p < e; p++) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003529 register const unsigned char ch = *p;
Christian Heimes44720832008-05-26 13:01:01 +00003530
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003531 if (isupper(ch)) {
3532 if (previous_is_cased)
3533 return PyBool_FromLong(0);
3534 previous_is_cased = 1;
3535 cased = 1;
3536 }
3537 else if (islower(ch)) {
3538 if (!previous_is_cased)
3539 return PyBool_FromLong(0);
3540 previous_is_cased = 1;
3541 cased = 1;
3542 }
3543 else
3544 previous_is_cased = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003545 }
3546 return PyBool_FromLong(cased);
3547}
3548
3549
3550PyDoc_STRVAR(splitlines__doc__,
Raymond Hettingeraad5b022012-06-02 01:42:58 -04003551"S.splitlines(keepends=False) -> list of strings\n\
Christian Heimes44720832008-05-26 13:01:01 +00003552\n\
3553Return a list of the lines in S, breaking at line boundaries.\n\
3554Line breaks are not included in the resulting list unless keepends\n\
3555is given and true.");
3556
3557static PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003558string_splitlines(PyStringObject *self, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00003559{
Christian Heimes44720832008-05-26 13:01:01 +00003560 int keepends = 0;
Christian Heimes44720832008-05-26 13:01:01 +00003561
3562 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003563 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003564
Antoine Pitrou64672132010-01-13 07:55:48 +00003565 return stringlib_splitlines(
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003566 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3567 keepends
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003568 );
Christian Heimes1a6387e2008-03-26 12:49:49 +00003569}
3570
Robert Schuppenies51df0642008-06-01 16:16:17 +00003571PyDoc_STRVAR(sizeof__doc__,
Georg Brandl7a6de8b2008-06-01 16:42:16 +00003572"S.__sizeof__() -> size of S in memory, in bytes");
Robert Schuppenies51df0642008-06-01 16:16:17 +00003573
3574static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003575string_sizeof(PyStringObject *v)
Robert Schuppenies51df0642008-06-01 16:16:17 +00003576{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003577 Py_ssize_t res;
3578 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3579 return PyInt_FromSsize_t(res);
Robert Schuppenies51df0642008-06-01 16:16:17 +00003580}
3581
Christian Heimes1a6387e2008-03-26 12:49:49 +00003582static PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003583string_getnewargs(PyStringObject *v)
Christian Heimes1a6387e2008-03-26 12:49:49 +00003584{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003585 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
Christian Heimes1a6387e2008-03-26 12:49:49 +00003586}
3587
Christian Heimes1a6387e2008-03-26 12:49:49 +00003588
Christian Heimes44720832008-05-26 13:01:01 +00003589#include "stringlib/string_format.h"
Christian Heimes1a6387e2008-03-26 12:49:49 +00003590
Christian Heimes44720832008-05-26 13:01:01 +00003591PyDoc_STRVAR(format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003592"S.format(*args, **kwargs) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003593\n\
Eric Smith6c840852010-11-06 19:43:44 +00003594Return a formatted version of S, using substitutions from args and kwargs.\n\
3595The substitutions are identified by braces ('{' and '}').");
Christian Heimes1a6387e2008-03-26 12:49:49 +00003596
Eric Smithdc13b792008-05-30 18:10:04 +00003597static PyObject *
3598string__format__(PyObject* self, PyObject* args)
3599{
3600 PyObject *format_spec;
3601 PyObject *result = NULL;
3602 PyObject *tmp = NULL;
3603
3604 /* If 2.x, convert format_spec to the same type as value */
3605 /* This is to allow things like u''.format('') */
3606 if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003607 goto done;
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003608 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003609 PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3610 "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3611 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003612 }
3613 tmp = PyObject_Str(format_spec);
3614 if (tmp == NULL)
Antoine Pitrou619f16e2010-06-09 16:24:00 +00003615 goto done;
Eric Smithdc13b792008-05-30 18:10:04 +00003616 format_spec = tmp;
3617
3618 result = _PyBytes_FormatAdvanced(self,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003619 PyString_AS_STRING(format_spec),
3620 PyString_GET_SIZE(format_spec));
Eric Smithdc13b792008-05-30 18:10:04 +00003621done:
3622 Py_XDECREF(tmp);
3623 return result;
3624}
3625
Christian Heimes44720832008-05-26 13:01:01 +00003626PyDoc_STRVAR(p_format__doc__,
Georg Brandl05f819b2010-07-31 19:07:37 +00003627"S.__format__(format_spec) -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003628\n\
Eric Smith6c840852010-11-06 19:43:44 +00003629Return a formatted version of S as described by format_spec.");
Christian Heimes44720832008-05-26 13:01:01 +00003630
Martin v. Löwisf91d46a2008-08-12 14:49:50 +00003631
Christian Heimes1a6387e2008-03-26 12:49:49 +00003632static PyMethodDef
Christian Heimes44720832008-05-26 13:01:01 +00003633string_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003634 /* Counterparts of the obsolete stropmodule functions; except
3635 string.maketrans(). */
3636 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3637 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3638 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3639 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3640 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3641 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3642 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3643 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3644 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3645 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3646 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3647 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3648 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3649 capitalize__doc__},
3650 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3651 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3652 endswith__doc__},
3653 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3654 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3655 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3656 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3657 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3658 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3659 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3660 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3661 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3662 rpartition__doc__},
3663 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3664 startswith__doc__},
3665 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3666 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3667 swapcase__doc__},
3668 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3669 translate__doc__},
3670 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3671 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3672 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3673 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3674 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3675 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3676 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3677 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3678 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3679 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3680 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3681 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3682 expandtabs__doc__},
3683 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3684 splitlines__doc__},
3685 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3686 sizeof__doc__},
3687 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3688 {NULL, NULL} /* sentinel */
Christian Heimes1a6387e2008-03-26 12:49:49 +00003689};
3690
3691static PyObject *
Christian Heimes44720832008-05-26 13:01:01 +00003692str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
Christian Heimes1a6387e2008-03-26 12:49:49 +00003693
Christian Heimes44720832008-05-26 13:01:01 +00003694static PyObject *
3695string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3696{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003697 PyObject *x = NULL;
3698 static char *kwlist[] = {"object", 0};
Christian Heimes44720832008-05-26 13:01:01 +00003699
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003700 if (type != &PyString_Type)
3701 return str_subtype_new(type, args, kwds);
3702 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3703 return NULL;
3704 if (x == NULL)
3705 return PyString_FromString("");
3706 return PyObject_Str(x);
Christian Heimes44720832008-05-26 13:01:01 +00003707}
3708
3709static PyObject *
3710str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3711{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003712 PyObject *tmp, *pnew;
3713 Py_ssize_t n;
Christian Heimes44720832008-05-26 13:01:01 +00003714
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003715 assert(PyType_IsSubtype(type, &PyString_Type));
3716 tmp = string_new(&PyString_Type, args, kwds);
3717 if (tmp == NULL)
3718 return NULL;
Serhiy Storchaka8d30ad72015-11-25 15:55:54 +02003719 assert(PyString_Check(tmp));
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003720 n = PyString_GET_SIZE(tmp);
3721 pnew = type->tp_alloc(type, n);
3722 if (pnew != NULL) {
3723 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3724 ((PyStringObject *)pnew)->ob_shash =
3725 ((PyStringObject *)tmp)->ob_shash;
3726 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3727 }
3728 Py_DECREF(tmp);
3729 return pnew;
Christian Heimes44720832008-05-26 13:01:01 +00003730}
3731
3732static PyObject *
3733basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3734{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003735 PyErr_SetString(PyExc_TypeError,
3736 "The basestring type cannot be instantiated");
3737 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003738}
3739
3740static PyObject *
3741string_mod(PyObject *v, PyObject *w)
3742{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003743 if (!PyString_Check(v)) {
3744 Py_INCREF(Py_NotImplemented);
3745 return Py_NotImplemented;
3746 }
3747 return PyString_Format(v, w);
Christian Heimes44720832008-05-26 13:01:01 +00003748}
3749
3750PyDoc_STRVAR(basestring_doc,
3751"Type basestring cannot be instantiated; it is the base for str and unicode.");
3752
3753static PyNumberMethods string_as_number = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003754 0, /*nb_add*/
3755 0, /*nb_subtract*/
3756 0, /*nb_multiply*/
3757 0, /*nb_divide*/
3758 string_mod, /*nb_remainder*/
Christian Heimes44720832008-05-26 13:01:01 +00003759};
3760
3761
3762PyTypeObject PyBaseString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003763 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3764 "basestring",
3765 0,
3766 0,
3767 0, /* tp_dealloc */
3768 0, /* tp_print */
3769 0, /* tp_getattr */
3770 0, /* tp_setattr */
3771 0, /* tp_compare */
3772 0, /* tp_repr */
3773 0, /* tp_as_number */
3774 0, /* tp_as_sequence */
3775 0, /* tp_as_mapping */
3776 0, /* tp_hash */
3777 0, /* tp_call */
3778 0, /* tp_str */
3779 0, /* tp_getattro */
3780 0, /* tp_setattro */
3781 0, /* tp_as_buffer */
3782 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3783 basestring_doc, /* tp_doc */
3784 0, /* tp_traverse */
3785 0, /* tp_clear */
3786 0, /* tp_richcompare */
3787 0, /* tp_weaklistoffset */
3788 0, /* tp_iter */
3789 0, /* tp_iternext */
3790 0, /* tp_methods */
3791 0, /* tp_members */
3792 0, /* tp_getset */
3793 &PyBaseObject_Type, /* tp_base */
3794 0, /* tp_dict */
3795 0, /* tp_descr_get */
3796 0, /* tp_descr_set */
3797 0, /* tp_dictoffset */
3798 0, /* tp_init */
3799 0, /* tp_alloc */
3800 basestring_new, /* tp_new */
3801 0, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003802};
3803
3804PyDoc_STRVAR(string_doc,
Chris Jerdonekad4b0002012-10-07 20:37:54 -07003805"str(object='') -> string\n\
Christian Heimes44720832008-05-26 13:01:01 +00003806\n\
3807Return a nice string representation of the object.\n\
3808If the argument is a string, the return value is the same object.");
3809
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003810PyTypeObject PyString_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003811 PyVarObject_HEAD_INIT(&PyType_Type, 0)
3812 "str",
3813 PyStringObject_SIZE,
3814 sizeof(char),
3815 string_dealloc, /* tp_dealloc */
3816 (printfunc)string_print, /* tp_print */
3817 0, /* tp_getattr */
3818 0, /* tp_setattr */
3819 0, /* tp_compare */
3820 string_repr, /* tp_repr */
3821 &string_as_number, /* tp_as_number */
3822 &string_as_sequence, /* tp_as_sequence */
3823 &string_as_mapping, /* tp_as_mapping */
3824 (hashfunc)string_hash, /* tp_hash */
3825 0, /* tp_call */
3826 string_str, /* tp_str */
3827 PyObject_GenericGetAttr, /* tp_getattro */
3828 0, /* tp_setattro */
3829 &string_as_buffer, /* tp_as_buffer */
3830 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3831 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3832 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3833 string_doc, /* tp_doc */
3834 0, /* tp_traverse */
3835 0, /* tp_clear */
3836 (richcmpfunc)string_richcompare, /* tp_richcompare */
3837 0, /* tp_weaklistoffset */
3838 0, /* tp_iter */
3839 0, /* tp_iternext */
3840 string_methods, /* tp_methods */
3841 0, /* tp_members */
3842 0, /* tp_getset */
3843 &PyBaseString_Type, /* tp_base */
3844 0, /* tp_dict */
3845 0, /* tp_descr_get */
3846 0, /* tp_descr_set */
3847 0, /* tp_dictoffset */
3848 0, /* tp_init */
3849 0, /* tp_alloc */
3850 string_new, /* tp_new */
3851 PyObject_Del, /* tp_free */
Christian Heimes44720832008-05-26 13:01:01 +00003852};
3853
3854void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003855PyString_Concat(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003856{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003857 register PyObject *v;
3858 if (*pv == NULL)
3859 return;
3860 if (w == NULL || !PyString_Check(*pv)) {
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02003861 Py_CLEAR(*pv);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003862 return;
3863 }
3864 v = string_concat((PyStringObject *) *pv, w);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03003865 Py_SETREF(*pv, v);
Christian Heimes44720832008-05-26 13:01:01 +00003866}
3867
3868void
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003869PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Christian Heimes44720832008-05-26 13:01:01 +00003870{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003871 PyString_Concat(pv, w);
3872 Py_XDECREF(w);
Christian Heimes44720832008-05-26 13:01:01 +00003873}
3874
3875
3876/* The following function breaks the notion that strings are immutable:
3877 it changes the size of a string. We get away with this only if there
3878 is only one module referencing the object. You can also think of it
3879 as creating a new string object and destroying the old one, only
3880 more efficiently. In any case, don't use this if the string may
3881 already be known to some other part of the code...
3882 Note that if there's not enough memory to resize the string, the original
3883 string object at *pv is deallocated, *pv is set to NULL, an "out of
3884 memory" exception is set, and -1 is returned. Else (on success) 0 is
3885 returned, and the value in *pv may or may not be the same as on input.
3886 As always, an extra byte is allocated for a trailing \0 byte (newsize
3887 does *not* include that), and a trailing \0 byte is stored.
3888*/
3889
3890int
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003891_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Christian Heimes44720832008-05-26 13:01:01 +00003892{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003893 register PyObject *v;
3894 register PyStringObject *sv;
3895 v = *pv;
3896 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3897 PyString_CHECK_INTERNED(v)) {
3898 *pv = 0;
3899 Py_DECREF(v);
3900 PyErr_BadInternalCall();
3901 return -1;
3902 }
3903 /* XXX UNREF/NEWREF interface should be more symmetrical */
3904 _Py_DEC_REFTOTAL;
3905 _Py_ForgetReference(v);
3906 *pv = (PyObject *)
3907 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3908 if (*pv == NULL) {
3909 PyObject_Del(v);
3910 PyErr_NoMemory();
3911 return -1;
3912 }
3913 _Py_NewReference(*pv);
3914 sv = (PyStringObject *) *pv;
3915 Py_SIZE(sv) = newsize;
3916 sv->ob_sval[newsize] = '\0';
3917 sv->ob_shash = -1; /* invalidate cached hash value */
3918 return 0;
Christian Heimes44720832008-05-26 13:01:01 +00003919}
3920
3921/* Helpers for formatstring */
3922
3923Py_LOCAL_INLINE(PyObject *)
3924getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3925{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003926 Py_ssize_t argidx = *p_argidx;
3927 if (argidx < arglen) {
3928 (*p_argidx)++;
3929 if (arglen < 0)
3930 return args;
3931 else
3932 return PyTuple_GetItem(args, argidx);
3933 }
3934 PyErr_SetString(PyExc_TypeError,
3935 "not enough arguments for format string");
3936 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00003937}
3938
3939/* Format codes
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003940 * F_LJUST '-'
3941 * F_SIGN '+'
3942 * F_BLANK ' '
3943 * F_ALT '#'
3944 * F_ZERO '0'
Christian Heimes44720832008-05-26 13:01:01 +00003945 */
3946#define F_LJUST (1<<0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003947#define F_SIGN (1<<1)
Christian Heimes44720832008-05-26 13:01:01 +00003948#define F_BLANK (1<<2)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003949#define F_ALT (1<<3)
3950#define F_ZERO (1<<4)
Christian Heimes44720832008-05-26 13:01:01 +00003951
Mark Dickinson18cfada2009-11-23 18:46:41 +00003952/* Returns a new reference to a PyString object, or NULL on failure. */
3953
3954static PyObject *
3955formatfloat(PyObject *v, int flags, int prec, int type)
Christian Heimes44720832008-05-26 13:01:01 +00003956{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003957 char *p;
3958 PyObject *result;
3959 double x;
Eric Smithc1bdf892009-10-26 17:46:17 +00003960
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003961 x = PyFloat_AsDouble(v);
3962 if (x == -1.0 && PyErr_Occurred()) {
3963 PyErr_Format(PyExc_TypeError, "float argument required, "
3964 "not %.200s", Py_TYPE(v)->tp_name);
3965 return NULL;
3966 }
Mark Dickinson18cfada2009-11-23 18:46:41 +00003967
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003968 if (prec < 0)
3969 prec = 6;
Mark Dickinson174e9092009-03-29 16:17:16 +00003970
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003971 p = PyOS_double_to_string(x, type, prec,
3972 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
Christian Heimes44720832008-05-26 13:01:01 +00003973
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003974 if (p == NULL)
3975 return NULL;
3976 result = PyString_FromStringAndSize(p, strlen(p));
3977 PyMem_Free(p);
3978 return result;
Christian Heimes44720832008-05-26 13:01:01 +00003979}
3980
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00003981/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
Christian Heimes44720832008-05-26 13:01:01 +00003982 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3983 * Python's regular ints.
3984 * Return value: a new PyString*, or NULL if error.
3985 * . *pbuf is set to point into it,
3986 * *plen set to the # of chars following that.
3987 * Caller must decref it when done using pbuf.
3988 * The string starting at *pbuf is of the form
3989 * "-"? ("0x" | "0X")? digit+
3990 * "0x"/"0X" are present only for x and X conversions, with F_ALT
3991 * set in flags. The case of hex digits will be correct,
3992 * There will be at least prec digits, zero-filled on the left if
3993 * necessary to get that many.
Antoine Pitrouc83ea132010-05-09 14:46:46 +00003994 * val object to be converted
3995 * flags bitmask of format flags; only F_ALT is looked at
3996 * prec minimum number of digits; 0-fill on left if needed
3997 * type a character in [duoxX]; u acts the same as d
Christian Heimes44720832008-05-26 13:01:01 +00003998 *
3999 * CAUTION: o, x and X conversions on regular ints can never
4000 * produce a '-' sign, but can for Python's unbounded ints.
4001 */
4002PyObject*
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004003_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004004 char **pbuf, int *plen)
Christian Heimes44720832008-05-26 13:01:01 +00004005{
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004006 PyObject *result = NULL, *r1;
4007 const char *s;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004008 char *buf;
4009 Py_ssize_t i;
4010 int sign; /* 1 if '-', else 0 */
4011 int len; /* number of characters */
4012 Py_ssize_t llen;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004013 int numdigits; /* len == numnondigits + skipped + numdigits */
4014 int numnondigits, skipped, filled;
4015 const char *method;
Christian Heimes44720832008-05-26 13:01:01 +00004016
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004017 switch (type) {
4018 case 'd':
4019 case 'u':
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004020 method = "str";
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004021 result = Py_TYPE(val)->tp_str(val);
4022 break;
4023 case 'o':
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004024 method = "oct";
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004025 result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4026 break;
4027 case 'x':
4028 case 'X':
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004029 method = "hex";
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004030 result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4031 break;
4032 default:
4033 assert(!"'type' not in [duoxX]");
4034 }
4035 if (!result)
4036 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004037
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004038 if (PyString_AsStringAndSize(result, (char **)&s, &llen) < 0) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004039 Py_DECREF(result);
4040 return NULL;
4041 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004042 if (llen > INT_MAX) {
4043 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004044 Py_DECREF(result);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004045 return NULL;
4046 }
4047 len = (int)llen;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004048 if (len > 0 && s[len-1] == 'L') {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004049 --len;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004050 if (len == 0)
4051 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004052 }
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004053 sign = s[0] == '-';
4054 numnondigits = sign;
Christian Heimes44720832008-05-26 13:01:01 +00004055
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004056 /* Need to skip 0x, 0X or 0. */
4057 skipped = 0;
4058 switch (type) {
4059 case 'o':
4060 if (s[sign] != '0')
4061 goto error;
4062 /* If 0 is only digit, leave it alone. */
4063 if ((flags & F_ALT) == 0 && len - sign > 1)
4064 skipped = 1;
4065 break;
4066 case 'x':
4067 case 'X':
4068 if (s[sign] != '0' || (s[sign + 1] != 'x' && s[sign + 1] != 'X'))
4069 goto error;
4070 if ((flags & F_ALT) == 0)
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004071 skipped = 2;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004072 else
4073 numnondigits += 2;
4074 break;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004075 }
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004076 numdigits = len - numnondigits - skipped;
4077 if (numdigits <= 0)
4078 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004079
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004080 filled = prec - numdigits;
4081 if (filled < 0)
4082 filled = 0;
4083 len = numnondigits + filled + numdigits;
4084
4085 /* To modify the string in-place, there can only be one reference. */
4086 if (skipped >= filled &&
4087 PyString_CheckExact(result) &&
4088 Py_REFCNT(result) == 1 &&
4089 !PyString_CHECK_INTERNED(result))
4090 {
4091 r1 = NULL;
4092 buf = (char *)s + skipped - filled;
4093 }
4094 else {
4095 r1 = result;
4096 result = PyString_FromStringAndSize(NULL, len);
4097 if (!result) {
4098 Py_DECREF(r1);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004099 return NULL;
4100 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004101 buf = PyString_AS_STRING(result);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004102 }
Christian Heimes44720832008-05-26 13:01:01 +00004103
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004104 for (i = numnondigits; --i >= 0;)
4105 buf[i] = s[i];
4106 buf += numnondigits;
4107 s += numnondigits + skipped;
4108 for (i = 0; i < filled; i++)
4109 *buf++ = '0';
4110 if (r1 == NULL) {
4111 assert(buf == s);
4112 buf += numdigits;
4113 }
4114 else {
4115 for (i = 0; i < numdigits; i++)
4116 *buf++ = *s++;
4117 }
4118 *buf = '\0';
4119 buf -= len;
4120 Py_XDECREF(r1);
4121
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004122 /* Fix up case for hex conversions. */
4123 if (type == 'X') {
4124 /* Need to convert all lower case letters to upper case.
4125 and need to convert 0x to 0X (and -0x to -0X). */
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004126 for (i = 0; i < len; i++) {
4127 if (buf[i] >= 'a' && buf[i] <= 'z')
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004128 buf[i] -= 'a'-'A';
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004129 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004130 }
4131 *pbuf = buf;
4132 *plen = len;
4133 return result;
Serhiy Storchakac30f27d2016-12-01 10:27:11 +02004134
4135error:
4136 PyErr_Format(PyExc_ValueError,
4137 "%%%c format: invalid result of __%s__ (type=%.200s)",
4138 type, method, Py_TYPE(val)->tp_name);
4139 Py_DECREF(result);
4140 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004141}
4142
4143Py_LOCAL_INLINE(int)
4144formatint(char *buf, size_t buflen, int flags,
4145 int prec, int type, PyObject *v)
4146{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004147 /* fmt = '%#.' + `prec` + 'l' + `type`
4148 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4149 + 1 + 1 = 24 */
4150 char fmt[64]; /* plenty big enough! */
4151 char *sign;
4152 long x;
Christian Heimes44720832008-05-26 13:01:01 +00004153
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004154 x = PyInt_AsLong(v);
4155 if (x == -1 && PyErr_Occurred()) {
4156 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4157 Py_TYPE(v)->tp_name);
4158 return -1;
4159 }
4160 if (x < 0 && type == 'u') {
4161 type = 'd';
4162 }
4163 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4164 sign = "-";
4165 else
4166 sign = "";
4167 if (prec < 0)
4168 prec = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004169
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004170 if ((flags & F_ALT) &&
4171 (type == 'x' || type == 'X')) {
4172 /* When converting under %#x or %#X, there are a number
4173 * of issues that cause pain:
4174 * - when 0 is being converted, the C standard leaves off
4175 * the '0x' or '0X', which is inconsistent with other
4176 * %#x/%#X conversions and inconsistent with Python's
4177 * hex() function
4178 * - there are platforms that violate the standard and
4179 * convert 0 with the '0x' or '0X'
4180 * (Metrowerks, Compaq Tru64)
4181 * - there are platforms that give '0x' when converting
4182 * under %#X, but convert 0 in accordance with the
4183 * standard (OS/2 EMX)
4184 *
4185 * We can achieve the desired consistency by inserting our
4186 * own '0x' or '0X' prefix, and substituting %x/%X in place
4187 * of %#x/%#X.
4188 *
4189 * Note that this is the same approach as used in
4190 * formatint() in unicodeobject.c
4191 */
4192 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4193 sign, type, prec, type);
4194 }
4195 else {
4196 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4197 sign, (flags&F_ALT) ? "#" : "",
4198 prec, type);
4199 }
Christian Heimes44720832008-05-26 13:01:01 +00004200
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004201 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4202 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4203 */
4204 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4205 PyErr_SetString(PyExc_OverflowError,
4206 "formatted integer is too long (precision too large?)");
4207 return -1;
4208 }
4209 if (sign[0])
4210 PyOS_snprintf(buf, buflen, fmt, -x);
4211 else
4212 PyOS_snprintf(buf, buflen, fmt, x);
4213 return (int)strlen(buf);
Christian Heimes44720832008-05-26 13:01:01 +00004214}
4215
4216Py_LOCAL_INLINE(int)
4217formatchar(char *buf, size_t buflen, PyObject *v)
4218{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004219 /* presume that the buffer is at least 2 characters long */
4220 if (PyString_Check(v)) {
4221 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4222 return -1;
4223 }
4224 else {
4225 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4226 return -1;
4227 }
4228 buf[1] = '\0';
4229 return 1;
Christian Heimes44720832008-05-26 13:01:01 +00004230}
4231
4232/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4233
Mark Dickinson18cfada2009-11-23 18:46:41 +00004234 FORMATBUFLEN is the length of the buffer in which the ints &
Christian Heimes44720832008-05-26 13:01:01 +00004235 chars are formatted. XXX This is a magic number. Each formatting
4236 routine does bounds checking to ensure no overflow, but a better
4237 solution may be to malloc a buffer of appropriate size for each
4238 format. For now, the current solution is sufficient.
4239*/
4240#define FORMATBUFLEN (size_t)120
4241
4242PyObject *
Gregory P. Smith99a3dce2008-06-10 17:42:36 +00004243PyString_Format(PyObject *format, PyObject *args)
Christian Heimes44720832008-05-26 13:01:01 +00004244{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004245 char *fmt, *res;
4246 Py_ssize_t arglen, argidx;
4247 Py_ssize_t reslen, rescnt, fmtcnt;
4248 int args_owned = 0;
4249 PyObject *result, *orig_args;
Christian Heimes44720832008-05-26 13:01:01 +00004250#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004251 PyObject *v, *w;
Christian Heimes44720832008-05-26 13:01:01 +00004252#endif
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004253 PyObject *dict = NULL;
4254 if (format == NULL || !PyString_Check(format) || args == NULL) {
4255 PyErr_BadInternalCall();
4256 return NULL;
4257 }
4258 orig_args = args;
4259 fmt = PyString_AS_STRING(format);
4260 fmtcnt = PyString_GET_SIZE(format);
4261 reslen = rescnt = fmtcnt + 100;
4262 result = PyString_FromStringAndSize((char *)NULL, reslen);
4263 if (result == NULL)
4264 return NULL;
4265 res = PyString_AsString(result);
4266 if (PyTuple_Check(args)) {
4267 arglen = PyTuple_GET_SIZE(args);
4268 argidx = 0;
4269 }
4270 else {
4271 arglen = -1;
4272 argidx = -2;
4273 }
Benjamin Petersonda2c7eb2013-03-23 22:32:00 -05004274 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4275 !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004276 dict = args;
4277 while (--fmtcnt >= 0) {
4278 if (*fmt != '%') {
4279 if (--rescnt < 0) {
4280 rescnt = fmtcnt + 100;
4281 reslen += rescnt;
4282 if (_PyString_Resize(&result, reslen))
4283 return NULL;
4284 res = PyString_AS_STRING(result)
4285 + reslen - rescnt;
4286 --rescnt;
4287 }
4288 *res++ = *fmt++;
4289 }
4290 else {
4291 /* Got a format specifier */
4292 int flags = 0;
4293 Py_ssize_t width = -1;
4294 int prec = -1;
4295 int c = '\0';
4296 int fill;
4297 int isnumok;
4298 PyObject *v = NULL;
4299 PyObject *temp = NULL;
4300 char *pbuf;
4301 int sign;
4302 Py_ssize_t len;
4303 char formatbuf[FORMATBUFLEN];
4304 /* For format{int,char}() */
Christian Heimes44720832008-05-26 13:01:01 +00004305#ifdef Py_USING_UNICODE
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004306 char *fmt_start = fmt;
4307 Py_ssize_t argidx_start = argidx;
Christian Heimes44720832008-05-26 13:01:01 +00004308#endif
4309
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004310 fmt++;
4311 if (*fmt == '(') {
4312 char *keystart;
4313 Py_ssize_t keylen;
4314 PyObject *key;
4315 int pcount = 1;
Christian Heimes44720832008-05-26 13:01:01 +00004316
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004317 if (dict == NULL) {
4318 PyErr_SetString(PyExc_TypeError,
4319 "format requires a mapping");
4320 goto error;
4321 }
4322 ++fmt;
4323 --fmtcnt;
4324 keystart = fmt;
4325 /* Skip over balanced parentheses */
4326 while (pcount > 0 && --fmtcnt >= 0) {
4327 if (*fmt == ')')
4328 --pcount;
4329 else if (*fmt == '(')
4330 ++pcount;
4331 fmt++;
4332 }
4333 keylen = fmt - keystart - 1;
4334 if (fmtcnt < 0 || pcount > 0) {
4335 PyErr_SetString(PyExc_ValueError,
4336 "incomplete format key");
4337 goto error;
4338 }
4339 key = PyString_FromStringAndSize(keystart,
4340 keylen);
4341 if (key == NULL)
4342 goto error;
4343 if (args_owned) {
4344 Py_DECREF(args);
4345 args_owned = 0;
4346 }
4347 args = PyObject_GetItem(dict, key);
4348 Py_DECREF(key);
4349 if (args == NULL) {
4350 goto error;
4351 }
4352 args_owned = 1;
4353 arglen = -1;
4354 argidx = -2;
4355 }
4356 while (--fmtcnt >= 0) {
4357 switch (c = *fmt++) {
4358 case '-': flags |= F_LJUST; continue;
4359 case '+': flags |= F_SIGN; continue;
4360 case ' ': flags |= F_BLANK; continue;
4361 case '#': flags |= F_ALT; continue;
4362 case '0': flags |= F_ZERO; continue;
4363 }
4364 break;
4365 }
4366 if (c == '*') {
4367 v = getnextarg(args, arglen, &argidx);
4368 if (v == NULL)
4369 goto error;
4370 if (!PyInt_Check(v)) {
4371 PyErr_SetString(PyExc_TypeError,
4372 "* wants int");
4373 goto error;
4374 }
Serhiy Storchaka926f3a32013-01-19 23:35:46 +02004375 width = PyInt_AsSsize_t(v);
4376 if (width == -1 && PyErr_Occurred())
4377 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004378 if (width < 0) {
4379 flags |= F_LJUST;
4380 width = -width;
4381 }
4382 if (--fmtcnt >= 0)
4383 c = *fmt++;
4384 }
4385 else if (c >= 0 && isdigit(c)) {
4386 width = c - '0';
4387 while (--fmtcnt >= 0) {
4388 c = Py_CHARMASK(*fmt++);
4389 if (!isdigit(c))
4390 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004391 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004392 PyErr_SetString(
4393 PyExc_ValueError,
4394 "width too big");
4395 goto error;
4396 }
4397 width = width*10 + (c - '0');
4398 }
4399 }
4400 if (c == '.') {
4401 prec = 0;
4402 if (--fmtcnt >= 0)
4403 c = *fmt++;
4404 if (c == '*') {
4405 v = getnextarg(args, arglen, &argidx);
4406 if (v == NULL)
4407 goto error;
4408 if (!PyInt_Check(v)) {
4409 PyErr_SetString(
4410 PyExc_TypeError,
4411 "* wants int");
4412 goto error;
4413 }
Serhiy Storchaka926f3a32013-01-19 23:35:46 +02004414 prec = _PyInt_AsInt(v);
4415 if (prec == -1 && PyErr_Occurred())
4416 goto error;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004417 if (prec < 0)
4418 prec = 0;
4419 if (--fmtcnt >= 0)
4420 c = *fmt++;
4421 }
4422 else if (c >= 0 && isdigit(c)) {
4423 prec = c - '0';
4424 while (--fmtcnt >= 0) {
4425 c = Py_CHARMASK(*fmt++);
4426 if (!isdigit(c))
4427 break;
Mark Dickinson75d36002012-10-28 10:00:46 +00004428 if (prec > (INT_MAX - ((int)c - '0')) / 10) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004429 PyErr_SetString(
4430 PyExc_ValueError,
4431 "prec too big");
4432 goto error;
Christian Heimes44720832008-05-26 13:01:01 +00004433 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004434 prec = prec*10 + (c - '0');
4435 }
4436 }
4437 } /* prec */
4438 if (fmtcnt >= 0) {
4439 if (c == 'h' || c == 'l' || c == 'L') {
4440 if (--fmtcnt >= 0)
4441 c = *fmt++;
4442 }
4443 }
4444 if (fmtcnt < 0) {
4445 PyErr_SetString(PyExc_ValueError,
4446 "incomplete format");
4447 goto error;
4448 }
4449 if (c != '%') {
4450 v = getnextarg(args, arglen, &argidx);
4451 if (v == NULL)
4452 goto error;
4453 }
4454 sign = 0;
4455 fill = ' ';
4456 switch (c) {
4457 case '%':
4458 pbuf = "%";
4459 len = 1;
4460 break;
4461 case 's':
4462#ifdef Py_USING_UNICODE
4463 if (PyUnicode_Check(v)) {
4464 fmt = fmt_start;
4465 argidx = argidx_start;
4466 goto unicode;
4467 }
4468#endif
4469 temp = _PyObject_Str(v);
4470#ifdef Py_USING_UNICODE
4471 if (temp != NULL && PyUnicode_Check(temp)) {
4472 Py_DECREF(temp);
4473 fmt = fmt_start;
4474 argidx = argidx_start;
4475 goto unicode;
4476 }
4477#endif
4478 /* Fall through */
4479 case 'r':
4480 if (c == 'r')
4481 temp = PyObject_Repr(v);
4482 if (temp == NULL)
4483 goto error;
4484 if (!PyString_Check(temp)) {
4485 PyErr_SetString(PyExc_TypeError,
4486 "%s argument has non-string str()");
4487 Py_DECREF(temp);
4488 goto error;
4489 }
4490 pbuf = PyString_AS_STRING(temp);
4491 len = PyString_GET_SIZE(temp);
4492 if (prec >= 0 && len > prec)
4493 len = prec;
4494 break;
4495 case 'i':
4496 case 'd':
4497 case 'u':
4498 case 'o':
4499 case 'x':
4500 case 'X':
4501 if (c == 'i')
4502 c = 'd';
4503 isnumok = 0;
4504 if (PyNumber_Check(v)) {
4505 PyObject *iobj=NULL;
4506
4507 if (PyInt_Check(v) || (PyLong_Check(v))) {
4508 iobj = v;
4509 Py_INCREF(iobj);
4510 }
4511 else {
4512 iobj = PyNumber_Int(v);
Benjamin Petersona708adf2013-01-02 12:21:32 -06004513 if (iobj==NULL) {
Benjamin Peterson8f53ded2013-01-02 12:25:15 -06004514 PyErr_Clear();
4515 iobj = PyNumber_Long(v);
4516 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004517 }
4518 if (iobj!=NULL) {
4519 if (PyInt_Check(iobj)) {
4520 isnumok = 1;
4521 pbuf = formatbuf;
4522 len = formatint(pbuf,
4523 sizeof(formatbuf),
4524 flags, prec, c, iobj);
4525 Py_DECREF(iobj);
4526 if (len < 0)
4527 goto error;
4528 sign = 1;
4529 }
4530 else if (PyLong_Check(iobj)) {
4531 int ilen;
4532
4533 isnumok = 1;
4534 temp = _PyString_FormatLong(iobj, flags,
4535 prec, c, &pbuf, &ilen);
4536 Py_DECREF(iobj);
4537 len = ilen;
4538 if (!temp)
4539 goto error;
4540 sign = 1;
4541 }
4542 else {
4543 Py_DECREF(iobj);
4544 }
4545 }
4546 }
4547 if (!isnumok) {
4548 PyErr_Format(PyExc_TypeError,
4549 "%%%c format: a number is required, "
4550 "not %.200s", c, Py_TYPE(v)->tp_name);
4551 goto error;
4552 }
4553 if (flags & F_ZERO)
4554 fill = '0';
4555 break;
4556 case 'e':
4557 case 'E':
4558 case 'f':
4559 case 'F':
4560 case 'g':
4561 case 'G':
4562 temp = formatfloat(v, flags, prec, c);
4563 if (temp == NULL)
4564 goto error;
4565 pbuf = PyString_AS_STRING(temp);
4566 len = PyString_GET_SIZE(temp);
4567 sign = 1;
4568 if (flags & F_ZERO)
4569 fill = '0';
4570 break;
4571 case 'c':
4572#ifdef Py_USING_UNICODE
4573 if (PyUnicode_Check(v)) {
4574 fmt = fmt_start;
4575 argidx = argidx_start;
4576 goto unicode;
4577 }
4578#endif
4579 pbuf = formatbuf;
4580 len = formatchar(pbuf, sizeof(formatbuf), v);
4581 if (len < 0)
4582 goto error;
4583 break;
4584 default:
4585 PyErr_Format(PyExc_ValueError,
4586 "unsupported format character '%c' (0x%x) "
4587 "at index %zd",
4588 c, c,
4589 (Py_ssize_t)(fmt - 1 -
4590 PyString_AsString(format)));
4591 goto error;
4592 }
4593 if (sign) {
4594 if (*pbuf == '-' || *pbuf == '+') {
4595 sign = *pbuf++;
4596 len--;
4597 }
4598 else if (flags & F_SIGN)
4599 sign = '+';
4600 else if (flags & F_BLANK)
4601 sign = ' ';
4602 else
4603 sign = 0;
4604 }
4605 if (width < len)
4606 width = len;
4607 if (rescnt - (sign != 0) < width) {
4608 reslen -= rescnt;
4609 rescnt = width + fmtcnt + 100;
4610 reslen += rescnt;
4611 if (reslen < 0) {
4612 Py_DECREF(result);
4613 Py_XDECREF(temp);
4614 return PyErr_NoMemory();
4615 }
4616 if (_PyString_Resize(&result, reslen)) {
4617 Py_XDECREF(temp);
4618 return NULL;
4619 }
4620 res = PyString_AS_STRING(result)
4621 + reslen - rescnt;
4622 }
4623 if (sign) {
4624 if (fill != ' ')
4625 *res++ = sign;
4626 rescnt--;
4627 if (width > len)
4628 width--;
4629 }
4630 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4631 assert(pbuf[0] == '0');
4632 assert(pbuf[1] == c);
4633 if (fill != ' ') {
4634 *res++ = *pbuf++;
4635 *res++ = *pbuf++;
4636 }
4637 rescnt -= 2;
4638 width -= 2;
4639 if (width < 0)
4640 width = 0;
4641 len -= 2;
4642 }
4643 if (width > len && !(flags & F_LJUST)) {
4644 do {
4645 --rescnt;
4646 *res++ = fill;
4647 } while (--width > len);
4648 }
4649 if (fill == ' ') {
4650 if (sign)
4651 *res++ = sign;
4652 if ((flags & F_ALT) &&
4653 (c == 'x' || c == 'X')) {
4654 assert(pbuf[0] == '0');
4655 assert(pbuf[1] == c);
4656 *res++ = *pbuf++;
4657 *res++ = *pbuf++;
4658 }
4659 }
4660 Py_MEMCPY(res, pbuf, len);
4661 res += len;
4662 rescnt -= len;
4663 while (--width >= len) {
4664 --rescnt;
4665 *res++ = ' ';
4666 }
4667 if (dict && (argidx < arglen) && c != '%') {
4668 PyErr_SetString(PyExc_TypeError,
4669 "not all arguments converted during string formatting");
4670 Py_XDECREF(temp);
4671 goto error;
4672 }
4673 Py_XDECREF(temp);
4674 } /* '%' */
4675 } /* until end */
4676 if (argidx < arglen && !dict) {
4677 PyErr_SetString(PyExc_TypeError,
4678 "not all arguments converted during string formatting");
4679 goto error;
4680 }
4681 if (args_owned) {
4682 Py_DECREF(args);
4683 }
4684 if (_PyString_Resize(&result, reslen - rescnt))
4685 return NULL;
4686 return result;
Christian Heimes44720832008-05-26 13:01:01 +00004687
4688#ifdef Py_USING_UNICODE
4689 unicode:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004690 if (args_owned) {
4691 Py_DECREF(args);
4692 args_owned = 0;
4693 }
4694 /* Fiddle args right (remove the first argidx arguments) */
4695 if (PyTuple_Check(orig_args) && argidx > 0) {
4696 PyObject *v;
4697 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4698 v = PyTuple_New(n);
4699 if (v == NULL)
4700 goto error;
4701 while (--n >= 0) {
4702 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4703 Py_INCREF(w);
4704 PyTuple_SET_ITEM(v, n, w);
4705 }
4706 args = v;
4707 } else {
4708 Py_INCREF(orig_args);
4709 args = orig_args;
4710 }
4711 args_owned = 1;
4712 /* Take what we have of the result and let the Unicode formatting
4713 function format the rest of the input. */
4714 rescnt = res - PyString_AS_STRING(result);
4715 if (_PyString_Resize(&result, rescnt))
4716 goto error;
4717 fmtcnt = PyString_GET_SIZE(format) - \
4718 (fmt - PyString_AS_STRING(format));
4719 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4720 if (format == NULL)
4721 goto error;
4722 v = PyUnicode_Format(format, args);
4723 Py_DECREF(format);
4724 if (v == NULL)
4725 goto error;
4726 /* Paste what we have (result) to what the Unicode formatting
4727 function returned (v) and return the result (or error) */
4728 w = PyUnicode_Concat(result, v);
4729 Py_DECREF(result);
4730 Py_DECREF(v);
4731 Py_DECREF(args);
4732 return w;
Christian Heimes44720832008-05-26 13:01:01 +00004733#endif /* Py_USING_UNICODE */
4734
4735 error:
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004736 Py_DECREF(result);
4737 if (args_owned) {
4738 Py_DECREF(args);
4739 }
4740 return NULL;
Christian Heimes44720832008-05-26 13:01:01 +00004741}
4742
4743void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004744PyString_InternInPlace(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004745{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004746 register PyStringObject *s = (PyStringObject *)(*p);
4747 PyObject *t;
4748 if (s == NULL || !PyString_Check(s))
4749 Py_FatalError("PyString_InternInPlace: strings only please!");
4750 /* If it's a string subclass, we don't really know what putting
4751 it in the interned dict might do. */
4752 if (!PyString_CheckExact(s))
4753 return;
4754 if (PyString_CHECK_INTERNED(s))
4755 return;
4756 if (interned == NULL) {
4757 interned = PyDict_New();
4758 if (interned == NULL) {
4759 PyErr_Clear(); /* Don't leave an exception */
4760 return;
4761 }
4762 }
4763 t = PyDict_GetItem(interned, (PyObject *)s);
4764 if (t) {
4765 Py_INCREF(t);
Serhiy Storchaka763a61c2016-04-10 18:05:12 +03004766 Py_SETREF(*p, t);
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004767 return;
4768 }
Christian Heimes44720832008-05-26 13:01:01 +00004769
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004770 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4771 PyErr_Clear();
4772 return;
4773 }
4774 /* The two references in interned are not counted by refcnt.
4775 The string deallocator will take care of this */
4776 Py_REFCNT(s) -= 2;
4777 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Christian Heimes44720832008-05-26 13:01:01 +00004778}
4779
4780void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004781PyString_InternImmortal(PyObject **p)
Christian Heimes44720832008-05-26 13:01:01 +00004782{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004783 PyString_InternInPlace(p);
4784 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4785 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4786 Py_INCREF(*p);
4787 }
Christian Heimes44720832008-05-26 13:01:01 +00004788}
4789
4790
4791PyObject *
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004792PyString_InternFromString(const char *cp)
Christian Heimes44720832008-05-26 13:01:01 +00004793{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004794 PyObject *s = PyString_FromString(cp);
4795 if (s == NULL)
4796 return NULL;
4797 PyString_InternInPlace(&s);
4798 return s;
Christian Heimes44720832008-05-26 13:01:01 +00004799}
4800
4801void
Gregory P. Smithdd96db62008-06-09 04:58:54 +00004802PyString_Fini(void)
Christian Heimes44720832008-05-26 13:01:01 +00004803{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004804 int i;
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02004805 for (i = 0; i < UCHAR_MAX + 1; i++)
4806 Py_CLEAR(characters[i]);
4807 Py_CLEAR(nullstring);
Christian Heimes44720832008-05-26 13:01:01 +00004808}
4809
4810void _Py_ReleaseInternedStrings(void)
4811{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004812 PyObject *keys;
4813 PyStringObject *s;
4814 Py_ssize_t i, n;
4815 Py_ssize_t immortal_size = 0, mortal_size = 0;
Christian Heimes44720832008-05-26 13:01:01 +00004816
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004817 if (interned == NULL || !PyDict_Check(interned))
4818 return;
4819 keys = PyDict_Keys(interned);
4820 if (keys == NULL || !PyList_Check(keys)) {
4821 PyErr_Clear();
4822 return;
4823 }
Christian Heimes44720832008-05-26 13:01:01 +00004824
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004825 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4826 detector, interned strings are not forcibly deallocated; rather, we
4827 give them their stolen references back, and then clear and DECREF
4828 the interned dict. */
Christian Heimes44720832008-05-26 13:01:01 +00004829
Antoine Pitrouc83ea132010-05-09 14:46:46 +00004830 n = PyList_GET_SIZE(keys);
4831 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4832 n);
4833 for (i = 0; i < n; i++) {
4834 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4835 switch (s->ob_sstate) {
4836 case SSTATE_NOT_INTERNED:
4837 /* XXX Shouldn't happen */
4838 break;
4839 case SSTATE_INTERNED_IMMORTAL:
4840 Py_REFCNT(s) += 1;
4841 immortal_size += Py_SIZE(s);
4842 break;
4843 case SSTATE_INTERNED_MORTAL:
4844 Py_REFCNT(s) += 2;
4845 mortal_size += Py_SIZE(s);
4846 break;
4847 default:
4848 Py_FatalError("Inconsistent interned string state.");
4849 }
4850 s->ob_sstate = SSTATE_NOT_INTERNED;
4851 }
4852 fprintf(stderr, "total size of all interned strings: "
4853 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4854 "mortal/immortal\n", mortal_size, immortal_size);
4855 Py_DECREF(keys);
4856 PyDict_Clear(interned);
Serhiy Storchakaa8d64ae2013-02-02 18:43:58 +02004857 Py_CLEAR(interned);
Christian Heimes1a6387e2008-03-26 12:49:49 +00004858}