blob: e3a0197a8b50b933b661d48b256e4357024ec0f5 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Guido van Rossumc0b618a1997-05-02 03:12:38 +000011static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000012static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013
Guido van Rossum45ec02a2002-08-19 21:43:18 +000014/* This dictionary holds all interned strings. Note that references to
15 strings in this dictionary are *not* counted in the string's ob_refcnt.
16 When the interned string reaches a refcnt of 0 the string deallocation
17 function will delete the reference from this dictionary.
18
Tim Petersae1d0c92006-03-17 03:29:34 +000019 Another way to look at this is that to say that the actual reference
Guido van Rossum45ec02a2002-08-19 21:43:18 +000020 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
21*/
22static PyObject *interned;
23
24
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000025/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For both PyString_FromString() and PyString_FromStringAndSize(), the
27 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000028 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000029
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000030 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000031 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000032
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000033 For PyString_FromStringAndSize(), the parameter the parameter `str' is
34 either NULL or else points to a string containing at least `size' bytes.
35 For PyString_FromStringAndSize(), the string in the `str' parameter does
36 not have to be null-terminated. (Therefore it is safe to construct a
37 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
38 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
39 bytes (setting the last byte to the null terminating character) and you can
40 fill in the data yourself. If `str' is non-NULL then the resulting
41 PyString object must be treated as immutable and you must not fill in nor
42 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000043
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000044 The PyObject member `op->ob_size', which denotes the number of "extra
45 items" in a variable-size object, will contain the number of bytes
46 allocated for string data, not counting the null terminating character. It
47 is therefore equal to the equal to the `size' parameter (for
48 PyString_FromStringAndSize()) or the length of the string in the `str'
49 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000050*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000051PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +000052PyString_FromStringAndSize(const char *str, Py_ssize_t size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000053{
Tim Peters9e897f42001-05-09 07:37:07 +000054 register PyStringObject *op;
Michael W. Hudsonfaa76482005-01-31 17:09:25 +000055 assert(size >= 0);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 if (size == 0 && (op = nullstring) != NULL) {
57#ifdef COUNT_ALLOCS
58 null_strings++;
59#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 Py_INCREF(op);
61 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000062 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 if (size == 1 && str != NULL &&
64 (op = characters[*str & UCHAR_MAX]) != NULL)
65 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000066#ifdef COUNT_ALLOCS
67 one_strings++;
68#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 Py_INCREF(op);
70 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000072
Guido van Rossume3a8e7e2002-08-19 19:26:42 +000073 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +000074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +000075 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000076 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000077 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +000079 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000080 if (str != NULL)
81 memcpy(op->ob_sval, str, size);
82 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000083 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000084 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000085 PyObject *t = (PyObject *)op;
86 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000087 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000088 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000090 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000091 PyObject *t = (PyObject *)op;
92 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000093 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000094 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000095 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000096 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000097 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000098}
99
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000100PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000101PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000102{
Tim Peters62de65b2001-12-06 20:29:32 +0000103 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +0000104 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +0000105
106 assert(str != NULL);
107 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000108 if (size > INT_MAX) {
109 PyErr_SetString(PyExc_OverflowError,
110 "string is too long for a Python string");
111 return NULL;
112 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000113 if (size == 0 && (op = nullstring) != NULL) {
114#ifdef COUNT_ALLOCS
115 null_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
121#ifdef COUNT_ALLOCS
122 one_strings++;
123#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000124 Py_INCREF(op);
125 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000126 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000127
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000128 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000130 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000131 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000132 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000134 op->ob_sstate = SSTATE_NOT_INTERNED;
Guido van Rossum169192e2001-12-10 15:45:54 +0000135 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000136 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000144 PyObject *t = (PyObject *)op;
145 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000146 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000147 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000148 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000149 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000150 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000151}
152
Barry Warsawdadace02001-08-24 18:32:06 +0000153PyObject *
154PyString_FromFormatV(const char *format, va_list vargs)
155{
Tim Petersc15c4f12001-10-02 21:32:07 +0000156 va_list count;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000157 Py_ssize_t n = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000158 const char* f;
159 char *s;
160 PyObject* string;
161
Tim Petersc15c4f12001-10-02 21:32:07 +0000162#ifdef VA_LIST_IS_ARRAY
163 memcpy(count, vargs, sizeof(va_list));
164#else
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000165#ifdef __va_copy
166 __va_copy(count, vargs);
167#else
Tim Petersc15c4f12001-10-02 21:32:07 +0000168 count = vargs;
169#endif
Martin v. Löwis75d2d942002-07-28 10:23:27 +0000170#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000171 /* step 1: figure out how large a buffer we need */
172 for (f = format; *f; f++) {
173 if (*f == '%') {
174 const char* p = f;
175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
176 ;
177
178 /* skip the 'l' in %ld, since it doesn't change the
179 width. although only %d is supported (see
180 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000181 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000182 if (*f == 'l' && *(f+1) == 'd')
183 ++f;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000184 /* likewise for %zd */
185 if (*f == 'z' && *(f+1) == 'd')
Tim Petersae1d0c92006-03-17 03:29:34 +0000186 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000187
Barry Warsawdadace02001-08-24 18:32:06 +0000188 switch (*f) {
189 case 'c':
190 (void)va_arg(count, int);
191 /* fall through... */
192 case '%':
193 n++;
194 break;
195 case 'd': case 'i': case 'x':
196 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000197 /* 20 bytes is enough to hold a 64-bit
198 integer. Decimal takes the most space.
199 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000200 n += 20;
201 break;
202 case 's':
203 s = va_arg(count, char*);
204 n += strlen(s);
205 break;
206 case 'p':
207 (void) va_arg(count, int);
208 /* maximum 64-bit pointer representation:
209 * 0xffffffffffffffff
210 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000211 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000212 */
213 n += 19;
214 break;
215 default:
216 /* if we stumble upon an unknown
217 formatting code, copy the rest of
218 the format string to the output
219 string. (we cannot just skip the
220 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000221 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000222 n += strlen(p);
223 goto expand;
224 }
225 } else
226 n++;
227 }
228 expand:
229 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000230 /* Since we've analyzed how much space we need for the worst case,
231 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000232 string = PyString_FromStringAndSize(NULL, n);
233 if (!string)
234 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000235
Barry Warsawdadace02001-08-24 18:32:06 +0000236 s = PyString_AsString(string);
237
238 for (f = format; *f; f++) {
239 if (*f == '%') {
240 const char* p = f++;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000241 Py_ssize_t i;
242 int longflag = 0;
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000243 int size_tflag = 0;
Barry Warsawdadace02001-08-24 18:32:06 +0000244 /* parse the width.precision part (we're only
245 interested in the precision value, if any) */
246 n = 0;
247 while (isdigit(Py_CHARMASK(*f)))
248 n = (n*10) + *f++ - '0';
249 if (*f == '.') {
250 f++;
251 n = 0;
252 while (isdigit(Py_CHARMASK(*f)))
253 n = (n*10) + *f++ - '0';
254 }
255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
256 f++;
257 /* handle the long flag, but only for %ld. others
258 can be added when necessary. */
259 if (*f == 'l' && *(f+1) == 'd') {
260 longflag = 1;
261 ++f;
262 }
Martin v. Löwis2c95cc62006-02-16 06:54:25 +0000263 /* handle the size_t flag. */
264 if (*f == 'z' && *(f+1) == 'd') {
265 size_tflag = 1;
266 ++f;
267 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000268
Barry Warsawdadace02001-08-24 18:32:06 +0000269 switch (*f) {
270 case 'c':
271 *s++ = va_arg(vargs, int);
272 break;
273 case 'd':
274 if (longflag)
275 sprintf(s, "%ld", va_arg(vargs, long));
Tim Petersae1d0c92006-03-17 03:29:34 +0000276 else if (size_tflag)
277 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
278 va_arg(vargs, size_t));
Barry Warsawdadace02001-08-24 18:32:06 +0000279 else
280 sprintf(s, "%d", va_arg(vargs, int));
281 s += strlen(s);
282 break;
283 case 'i':
284 sprintf(s, "%i", va_arg(vargs, int));
285 s += strlen(s);
286 break;
287 case 'x':
288 sprintf(s, "%x", va_arg(vargs, int));
289 s += strlen(s);
290 break;
291 case 's':
292 p = va_arg(vargs, char*);
293 i = strlen(p);
294 if (n > 0 && i > n)
295 i = n;
296 memcpy(s, p, i);
297 s += i;
298 break;
299 case 'p':
300 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000301 /* %p is ill-defined: ensure leading 0x. */
302 if (s[1] == 'X')
303 s[1] = 'x';
304 else if (s[1] != 'x') {
305 memmove(s+2, s, strlen(s)+1);
306 s[0] = '0';
307 s[1] = 'x';
308 }
Barry Warsawdadace02001-08-24 18:32:06 +0000309 s += strlen(s);
310 break;
311 case '%':
312 *s++ = '%';
313 break;
314 default:
315 strcpy(s, p);
316 s += strlen(s);
317 goto end;
318 }
319 } else
320 *s++ = *f;
321 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000322
Barry Warsawdadace02001-08-24 18:32:06 +0000323 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000324 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000325 return string;
326}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000327
Barry Warsawdadace02001-08-24 18:32:06 +0000328PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000329PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000330{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000331 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000332 va_list vargs;
333
334#ifdef HAVE_STDARG_PROTOTYPES
335 va_start(vargs, format);
336#else
337 va_start(vargs);
338#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000339 ret = PyString_FromFormatV(format, vargs);
340 va_end(vargs);
341 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000342}
343
344
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000345PyObject *PyString_Decode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000346 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000347 const char *encoding,
348 const char *errors)
349{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000350 PyObject *v, *str;
351
352 str = PyString_FromStringAndSize(s, size);
353 if (str == NULL)
354 return NULL;
355 v = PyString_AsDecodedString(str, encoding, errors);
356 Py_DECREF(str);
357 return v;
358}
359
360PyObject *PyString_AsDecodedObject(PyObject *str,
361 const char *encoding,
362 const char *errors)
363{
364 PyObject *v;
365
366 if (!PyString_Check(str)) {
367 PyErr_BadArgument();
368 goto onError;
369 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000370
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000371 if (encoding == NULL) {
372#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000373 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000374#else
375 PyErr_SetString(PyExc_ValueError, "no encoding specified");
376 goto onError;
377#endif
378 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000379
380 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000381 v = PyCodec_Decode(str, encoding, errors);
382 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000383 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000384
385 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000386
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000387 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 return NULL;
389}
390
391PyObject *PyString_AsDecodedString(PyObject *str,
392 const char *encoding,
393 const char *errors)
394{
395 PyObject *v;
396
397 v = PyString_AsDecodedObject(str, encoding, errors);
398 if (v == NULL)
399 goto onError;
400
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000401#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000402 /* Convert Unicode to a string using the default encoding */
403 if (PyUnicode_Check(v)) {
404 PyObject *temp = v;
405 v = PyUnicode_AsEncodedString(v, NULL, NULL);
406 Py_DECREF(temp);
407 if (v == NULL)
408 goto onError;
409 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000410#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411 if (!PyString_Check(v)) {
412 PyErr_Format(PyExc_TypeError,
413 "decoder did not return a string object (type=%.400s)",
414 v->ob_type->tp_name);
415 Py_DECREF(v);
416 goto onError;
417 }
418
419 return v;
420
421 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000422 return NULL;
423}
424
425PyObject *PyString_Encode(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000426 Py_ssize_t size,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000427 const char *encoding,
428 const char *errors)
429{
430 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000431
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000432 str = PyString_FromStringAndSize(s, size);
433 if (str == NULL)
434 return NULL;
435 v = PyString_AsEncodedString(str, encoding, errors);
436 Py_DECREF(str);
437 return v;
438}
439
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000440PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000441 const char *encoding,
442 const char *errors)
443{
444 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000445
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000446 if (!PyString_Check(str)) {
447 PyErr_BadArgument();
448 goto onError;
449 }
450
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000451 if (encoding == NULL) {
452#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000454#else
455 PyErr_SetString(PyExc_ValueError, "no encoding specified");
456 goto onError;
457#endif
458 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459
460 /* Encode via the codec registry */
461 v = PyCodec_Encode(str, encoding, errors);
462 if (v == NULL)
463 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000464
465 return v;
466
467 onError:
468 return NULL;
469}
470
471PyObject *PyString_AsEncodedString(PyObject *str,
472 const char *encoding,
473 const char *errors)
474{
475 PyObject *v;
476
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000477 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000478 if (v == NULL)
479 goto onError;
480
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000481#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000482 /* Convert Unicode to a string using the default encoding */
483 if (PyUnicode_Check(v)) {
484 PyObject *temp = v;
485 v = PyUnicode_AsEncodedString(v, NULL, NULL);
486 Py_DECREF(temp);
487 if (v == NULL)
488 goto onError;
489 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000490#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000491 if (!PyString_Check(v)) {
492 PyErr_Format(PyExc_TypeError,
493 "encoder did not return a string object (type=%.400s)",
494 v->ob_type->tp_name);
495 Py_DECREF(v);
496 goto onError;
497 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000498
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000499 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000500
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000501 onError:
502 return NULL;
503}
504
Guido van Rossum234f9421993-06-17 12:35:49 +0000505static void
Fred Drakeba096332000-07-09 07:04:36 +0000506string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000507{
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000508 switch (PyString_CHECK_INTERNED(op)) {
509 case SSTATE_NOT_INTERNED:
510 break;
511
512 case SSTATE_INTERNED_MORTAL:
513 /* revive dead object temporarily for DelItem */
514 op->ob_refcnt = 3;
515 if (PyDict_DelItem(interned, op) != 0)
516 Py_FatalError(
517 "deletion of interned string failed");
518 break;
519
520 case SSTATE_INTERNED_IMMORTAL:
521 Py_FatalError("Immortal interned string died.");
522
523 default:
524 Py_FatalError("Inconsistent interned string state.");
525 }
Guido van Rossum9475a232001-10-05 20:51:39 +0000526 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000527}
528
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000529/* Unescape a backslash-escaped string. If unicode is non-zero,
530 the string is a u-literal. If recode_encoding is non-zero,
531 the string is UTF-8 encoded and should be re-encoded in the
532 specified encoding. */
533
534PyObject *PyString_DecodeEscape(const char *s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000535 Py_ssize_t len,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000536 const char *errors,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000537 Py_ssize_t unicode,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000538 const char *recode_encoding)
539{
540 int c;
541 char *p, *buf;
542 const char *end;
543 PyObject *v;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000544 Py_ssize_t newlen = recode_encoding ? 4*len:len;
Walter Dörwald8709a422002-09-03 13:53:40 +0000545 v = PyString_FromStringAndSize((char *)NULL, newlen);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000546 if (v == NULL)
547 return NULL;
548 p = buf = PyString_AsString(v);
549 end = s + len;
550 while (s < end) {
551 if (*s != '\\') {
Martin v. Löwis24128532002-09-09 06:17:05 +0000552 non_esc:
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000553#ifdef Py_USING_UNICODE
554 if (recode_encoding && (*s & 0x80)) {
555 PyObject *u, *w;
556 char *r;
557 const char* t;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000558 Py_ssize_t rn;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000559 t = s;
560 /* Decode non-ASCII bytes as UTF-8. */
561 while (t < end && (*t & 0x80)) t++;
562 u = PyUnicode_DecodeUTF8(s, t - s, errors);
563 if(!u) goto failed;
564
565 /* Recode them in target encoding. */
566 w = PyUnicode_AsEncodedString(
567 u, recode_encoding, errors);
568 Py_DECREF(u);
569 if (!w) goto failed;
570
571 /* Append bytes to output buffer. */
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +0000572 assert(PyString_Check(w));
573 r = PyString_AS_STRING(w);
574 rn = PyString_GET_SIZE(w);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000575 memcpy(p, r, rn);
576 p += rn;
577 Py_DECREF(w);
578 s = t;
579 } else {
580 *p++ = *s++;
581 }
582#else
583 *p++ = *s++;
584#endif
585 continue;
586 }
587 s++;
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000588 if (s==end) {
589 PyErr_SetString(PyExc_ValueError,
590 "Trailing \\ in string");
591 goto failed;
592 }
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000593 switch (*s++) {
594 /* XXX This assumes ASCII! */
595 case '\n': break;
596 case '\\': *p++ = '\\'; break;
597 case '\'': *p++ = '\''; break;
598 case '\"': *p++ = '\"'; break;
599 case 'b': *p++ = '\b'; break;
600 case 'f': *p++ = '\014'; break; /* FF */
601 case 't': *p++ = '\t'; break;
602 case 'n': *p++ = '\n'; break;
603 case 'r': *p++ = '\r'; break;
604 case 'v': *p++ = '\013'; break; /* VT */
605 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
606 case '0': case '1': case '2': case '3':
607 case '4': case '5': case '6': case '7':
608 c = s[-1] - '0';
609 if ('0' <= *s && *s <= '7') {
610 c = (c<<3) + *s++ - '0';
611 if ('0' <= *s && *s <= '7')
612 c = (c<<3) + *s++ - '0';
613 }
614 *p++ = c;
615 break;
616 case 'x':
Tim Petersae1d0c92006-03-17 03:29:34 +0000617 if (isxdigit(Py_CHARMASK(s[0]))
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000618 && isxdigit(Py_CHARMASK(s[1]))) {
619 unsigned int x = 0;
620 c = Py_CHARMASK(*s);
621 s++;
622 if (isdigit(c))
623 x = c - '0';
624 else if (islower(c))
625 x = 10 + c - 'a';
626 else
627 x = 10 + c - 'A';
628 x = x << 4;
629 c = Py_CHARMASK(*s);
630 s++;
631 if (isdigit(c))
632 x += c - '0';
633 else if (islower(c))
634 x += 10 + c - 'a';
635 else
636 x += 10 + c - 'A';
637 *p++ = x;
638 break;
639 }
640 if (!errors || strcmp(errors, "strict") == 0) {
Tim Petersae1d0c92006-03-17 03:29:34 +0000641 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000642 "invalid \\x escape");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000643 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000644 }
645 if (strcmp(errors, "replace") == 0) {
646 *p++ = '?';
647 } else if (strcmp(errors, "ignore") == 0)
648 /* do nothing */;
649 else {
650 PyErr_Format(PyExc_ValueError,
651 "decoding error; "
652 "unknown error handling code: %.400s",
653 errors);
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000654 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000655 }
656#ifndef Py_USING_UNICODE
657 case 'u':
658 case 'U':
659 case 'N':
660 if (unicode) {
Neal Norwitzb898d9f2002-08-16 23:20:39 +0000661 PyErr_SetString(PyExc_ValueError,
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000662 "Unicode escapes not legal "
663 "when Unicode disabled");
Martin v. Löwiseb3f00a2002-08-14 08:22:50 +0000664 goto failed;
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000665 }
666#endif
667 default:
668 *p++ = '\\';
Martin v. Löwis24128532002-09-09 06:17:05 +0000669 s--;
670 goto non_esc; /* an arbitry number of unescaped
671 UTF-8 bytes may follow. */
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000672 }
673 }
Walter Dörwald8709a422002-09-03 13:53:40 +0000674 if (p-buf < newlen)
Martin v. Löwis18e16552006-02-15 17:27:45 +0000675 _PyString_Resize(&v, p - buf);
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000676 return v;
677 failed:
678 Py_DECREF(v);
679 return NULL;
680}
681
Martin v. Löwis18e16552006-02-15 17:27:45 +0000682static Py_ssize_t
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000683string_getsize(register PyObject *op)
684{
685 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000686 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000687 if (PyString_AsStringAndSize(op, &s, &len))
688 return -1;
689 return len;
690}
691
692static /*const*/ char *
693string_getbuffer(register PyObject *op)
694{
695 char *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000696 Py_ssize_t len;
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000697 if (PyString_AsStringAndSize(op, &s, &len))
698 return NULL;
699 return s;
700}
701
Martin v. Löwis18e16552006-02-15 17:27:45 +0000702Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000703PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000704{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000705 if (!PyString_Check(op))
706 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000707 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000708}
709
710/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000711PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000712{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000713 if (!PyString_Check(op))
714 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000715 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716}
717
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000718int
719PyString_AsStringAndSize(register PyObject *obj,
720 register char **s,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000721 register Py_ssize_t *len)
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000722{
723 if (s == NULL) {
724 PyErr_BadInternalCall();
725 return -1;
726 }
727
728 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000729#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000730 if (PyUnicode_Check(obj)) {
731 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
732 if (obj == NULL)
733 return -1;
734 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000735 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000736#endif
737 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000738 PyErr_Format(PyExc_TypeError,
739 "expected string or Unicode object, "
740 "%.200s found", obj->ob_type->tp_name);
741 return -1;
742 }
743 }
744
745 *s = PyString_AS_STRING(obj);
746 if (len != NULL)
747 *len = PyString_GET_SIZE(obj);
Martin v. Löwis18e16552006-02-15 17:27:45 +0000748 else if (strlen(*s) != PyString_GET_SIZE(obj)) {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000749 PyErr_SetString(PyExc_TypeError,
750 "expected string without null bytes");
751 return -1;
752 }
753 return 0;
754}
755
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756/* Methods */
757
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000758static int
Fred Drakeba096332000-07-09 07:04:36 +0000759string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000760{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000761 Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000762 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000763 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000764
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000765 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000766 if (! PyString_CheckExact(op)) {
767 int ret;
768 /* A str subclass may have its own __str__ method. */
769 op = (PyStringObject *) PyObject_Str((PyObject *)op);
770 if (op == NULL)
771 return -1;
772 ret = string_print(op, fp, flags);
773 Py_DECREF(op);
774 return ret;
775 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000776 if (flags & Py_PRINT_RAW) {
Martin v. Löwis79acb9e2002-12-06 12:48:53 +0000777#ifdef __VMS
778 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
779#else
780 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
781#endif
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000782 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000783 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000784
Thomas Wouters7e474022000-07-16 12:04:32 +0000785 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000786 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000787 if (memchr(op->ob_sval, '\'', op->ob_size) &&
788 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000789 quote = '"';
790
791 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000792 for (i = 0; i < op->ob_size; i++) {
793 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000794 if (c == quote || c == '\\')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000795 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000796 else if (c == '\t')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000797 fprintf(fp, "\\t");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000798 else if (c == '\n')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000799 fprintf(fp, "\\n");
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000800 else if (c == '\r')
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000801 fprintf(fp, "\\r");
802 else if (c < ' ' || c >= 0x7f)
803 fprintf(fp, "\\x%02x", c & 0xff);
Martin v. Löwisfed24052002-10-07 13:55:50 +0000804 else
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000805 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000806 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000807 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000808 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000809}
810
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000811PyObject *
812PyString_Repr(PyObject *obj, int smartquotes)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000813{
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000814 register PyStringObject* op = (PyStringObject*) obj;
Tim Peterse7c05322004-06-27 17:24:49 +0000815 size_t newsize = 2 + 4 * op->ob_size;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000816 PyObject *v;
817 if (newsize > INT_MAX) {
818 PyErr_SetString(PyExc_OverflowError,
819 "string is too large to make repr");
820 }
821 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000822 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000823 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000824 }
825 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000826 register Py_ssize_t i;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000827 register char c;
828 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000829 int quote;
830
Thomas Wouters7e474022000-07-16 12:04:32 +0000831 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000832 quote = '\'';
Tim Petersae1d0c92006-03-17 03:29:34 +0000833 if (smartquotes &&
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000834 memchr(op->ob_sval, '\'', op->ob_size) &&
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000835 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000836 quote = '"';
837
Tim Peters9161c8b2001-12-03 01:55:38 +0000838 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000839 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000840 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000841 /* There's at least enough room for a hex escape
842 and a closing quote. */
843 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000844 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000845 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000846 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000847 else if (c == '\t')
848 *p++ = '\\', *p++ = 't';
849 else if (c == '\n')
850 *p++ = '\\', *p++ = 'n';
851 else if (c == '\r')
852 *p++ = '\\', *p++ = 'r';
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000853 else if (c < ' ' || c >= 0x7f) {
854 /* For performance, we don't want to call
855 PyOS_snprintf here (extra layers of
856 function call). */
857 sprintf(p, "\\x%02x", c & 0xff);
858 p += 4;
Martin v. Löwisfed24052002-10-07 13:55:50 +0000859 }
Martin v. Löwisa5f09072002-10-11 05:37:59 +0000860 else
861 *p++ = c;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000862 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000863 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000864 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000865 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000866 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000867 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000868 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000869 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000870}
871
Guido van Rossum189f1df2001-05-01 16:51:53 +0000872static PyObject *
Martin v. Löwis8a8da792002-08-14 07:46:28 +0000873string_repr(PyObject *op)
874{
875 return PyString_Repr(op, 1);
876}
877
878static PyObject *
Guido van Rossum189f1df2001-05-01 16:51:53 +0000879string_str(PyObject *s)
880{
Tim Petersc9933152001-10-16 20:18:24 +0000881 assert(PyString_Check(s));
882 if (PyString_CheckExact(s)) {
883 Py_INCREF(s);
884 return s;
885 }
886 else {
887 /* Subtype -- return genuine string with the same value. */
888 PyStringObject *t = (PyStringObject *) s;
889 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
890 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000891}
892
Martin v. Löwis18e16552006-02-15 17:27:45 +0000893static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +0000894string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000895{
896 return a->ob_size;
897}
898
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000899static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000900string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000901{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000902 register size_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000903 register PyStringObject *op;
904 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000905#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000906 if (PyUnicode_Check(bb))
907 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000908#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000909 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000910 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000911 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000912 return NULL;
913 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000914#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000915 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000916 if ((a->ob_size == 0 || b->ob_size == 0) &&
917 PyString_CheckExact(a) && PyString_CheckExact(b)) {
918 if (a->ob_size == 0) {
919 Py_INCREF(bb);
920 return bb;
921 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000922 Py_INCREF(a);
923 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000924 }
925 size = a->ob_size + b->ob_size;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000926 /* XXX check overflow */
Guido van Rossume3a8e7e2002-08-19 19:26:42 +0000927 /* Inline PyObject_NewVar */
Tim Peterse7c05322004-06-27 17:24:49 +0000928 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000929 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000931 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000932 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000933 op->ob_sstate = SSTATE_NOT_INTERNED;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000934 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
935 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000936 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000937 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000938#undef b
939}
940
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000941static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +0000942string_repeat(register PyStringObject *a, register Py_ssize_t n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000943{
Martin v. Löwis18e16552006-02-15 17:27:45 +0000944 register Py_ssize_t i;
945 register Py_ssize_t j;
946 register Py_ssize_t size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000947 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000948 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000949 if (n < 0)
950 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000951 /* watch out for overflows: the size can overflow int,
952 * and the # of bytes needed can overflow size_t
953 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000954 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000955 if (n && size / n != a->ob_size) {
956 PyErr_SetString(PyExc_OverflowError,
957 "repeated string is too long");
958 return NULL;
959 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000960 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000961 Py_INCREF(a);
962 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000963 }
Tim Peterse7c05322004-06-27 17:24:49 +0000964 nbytes = (size_t)size;
965 if (nbytes + sizeof(PyStringObject) <= nbytes) {
Tim Peters8f422462000-09-09 06:13:41 +0000966 PyErr_SetString(PyExc_OverflowError,
967 "repeated string is too long");
968 return NULL;
969 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000970 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000971 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000972 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000973 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000974 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000975 op->ob_shash = -1;
Guido van Rossum45ec02a2002-08-19 21:43:18 +0000976 op->ob_sstate = SSTATE_NOT_INTERNED;
Raymond Hettinger0a2f8492003-01-06 22:42:41 +0000977 op->ob_sval[size] = '\0';
978 if (a->ob_size == 1 && n > 0) {
979 memset(op->ob_sval, a->ob_sval[0] , n);
980 return (PyObject *) op;
981 }
Raymond Hettinger698258a2003-01-06 10:33:56 +0000982 i = 0;
983 if (i < size) {
Martin v. Löwis18e16552006-02-15 17:27:45 +0000984 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
985 i = a->ob_size;
Raymond Hettinger698258a2003-01-06 10:33:56 +0000986 }
987 while (i < size) {
988 j = (i <= size-i) ? i : size-i;
989 memcpy(op->ob_sval+i, op->ob_sval, j);
990 i += j;
991 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000992 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000993}
994
995/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
996
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000997static PyObject *
Tim Petersae1d0c92006-03-17 03:29:34 +0000998string_slice(register PyStringObject *a, register Py_ssize_t i,
Martin v. Löwis18e16552006-02-15 17:27:45 +0000999 register Py_ssize_t j)
Fred Drakeba096332000-07-09 07:04:36 +00001000 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001001{
1002 if (i < 0)
1003 i = 0;
1004 if (j < 0)
1005 j = 0; /* Avoid signed/unsigned bug in next line */
1006 if (j > a->ob_size)
1007 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +00001008 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1009 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001010 Py_INCREF(a);
1011 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001012 }
1013 if (j < i)
1014 j = i;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001015 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001016}
1017
Guido van Rossum9284a572000-03-07 15:53:43 +00001018static int
Fred Drakeba096332000-07-09 07:04:36 +00001019string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +00001020{
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001021 char *s = PyString_AS_STRING(a);
1022 const char *sub = PyString_AS_STRING(el);
1023 char *last;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001024 Py_ssize_t len_sub = PyString_GET_SIZE(el);
Martin v. Löwiseb079f12006-02-16 14:32:27 +00001025 Py_ssize_t shortsub;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001026 char firstchar, lastchar;
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001027
1028 if (!PyString_CheckExact(el)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001029#ifdef Py_USING_UNICODE
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001030 if (PyUnicode_Check(el))
1031 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001032#endif
Guido van Rossumbf935fd2002-08-24 06:57:49 +00001033 if (!PyString_Check(el)) {
1034 PyErr_SetString(PyExc_TypeError,
1035 "'in <string>' requires string as left operand");
1036 return -1;
1037 }
Guido van Rossum9284a572000-03-07 15:53:43 +00001038 }
Barry Warsaw817918c2002-08-06 16:58:21 +00001039
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001040 if (len_sub == 0)
1041 return 1;
Tim Petersae1d0c92006-03-17 03:29:34 +00001042 /* last points to one char beyond the start of the rightmost
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001043 substring. When s<last, there is still room for a possible match
1044 and s[0] through s[len_sub-1] will be in bounds.
1045 shortsub is len_sub minus the last character which is checked
1046 separately just before the memcmp(). That check helps prevent
1047 false starts and saves the setup time for memcmp().
1048 */
1049 firstchar = sub[0];
1050 shortsub = len_sub - 1;
1051 lastchar = sub[shortsub];
1052 last = s + PyString_GET_SIZE(a) - len_sub + 1;
1053 while (s < last) {
1054 s = memchr(s, firstchar, last-s);
1055 if (s == NULL)
1056 return 0;
1057 assert(s < last);
1058 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
Guido van Rossum9284a572000-03-07 15:53:43 +00001059 return 1;
Raymond Hettinger7cbf1bc2005-02-20 04:07:08 +00001060 s++;
Guido van Rossum9284a572000-03-07 15:53:43 +00001061 }
1062 return 0;
1063}
1064
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001065static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001066string_item(PyStringObject *a, register Py_ssize_t i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001067{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001068 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +00001069 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001070 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001071 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001072 return NULL;
1073 }
Tim Peters5b4d4772001-05-08 22:33:50 +00001074 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001075 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +00001076 if (v == NULL)
1077 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001078 else {
1079#ifdef COUNT_ALLOCS
1080 one_strings++;
1081#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +00001082 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +00001083 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +00001084 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001085}
1086
Martin v. Löwiscd353062001-05-24 16:56:35 +00001087static PyObject*
1088string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001089{
Martin v. Löwiscd353062001-05-24 16:56:35 +00001090 int c;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001091 Py_ssize_t len_a, len_b;
1092 Py_ssize_t min_len;
Martin v. Löwiscd353062001-05-24 16:56:35 +00001093 PyObject *result;
1094
Guido van Rossum2ed6bf82001-09-27 20:30:07 +00001095 /* Make sure both arguments are strings. */
1096 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +00001097 result = Py_NotImplemented;
1098 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001099 }
Martin v. Löwiscd353062001-05-24 16:56:35 +00001100 if (a == b) {
1101 switch (op) {
1102 case Py_EQ:case Py_LE:case Py_GE:
1103 result = Py_True;
1104 goto out;
1105 case Py_NE:case Py_LT:case Py_GT:
1106 result = Py_False;
1107 goto out;
1108 }
1109 }
1110 if (op == Py_EQ) {
1111 /* Supporting Py_NE here as well does not save
1112 much time, since Py_NE is rarely used. */
1113 if (a->ob_size == b->ob_size
1114 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00001115 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +00001116 a->ob_size) == 0)) {
1117 result = Py_True;
1118 } else {
1119 result = Py_False;
1120 }
1121 goto out;
1122 }
1123 len_a = a->ob_size; len_b = b->ob_size;
1124 min_len = (len_a < len_b) ? len_a : len_b;
1125 if (min_len > 0) {
1126 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1127 if (c==0)
1128 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1129 }else
1130 c = 0;
1131 if (c == 0)
1132 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1133 switch (op) {
1134 case Py_LT: c = c < 0; break;
1135 case Py_LE: c = c <= 0; break;
1136 case Py_EQ: assert(0); break; /* unreachable */
1137 case Py_NE: c = c != 0; break;
1138 case Py_GT: c = c > 0; break;
1139 case Py_GE: c = c >= 0; break;
1140 default:
1141 result = Py_NotImplemented;
1142 goto out;
1143 }
1144 result = c ? Py_True : Py_False;
1145 out:
1146 Py_INCREF(result);
1147 return result;
1148}
1149
1150int
1151_PyString_Eq(PyObject *o1, PyObject *o2)
1152{
1153 PyStringObject *a, *b;
1154 a = (PyStringObject*)o1;
1155 b = (PyStringObject*)o2;
1156 return a->ob_size == b->ob_size
1157 && *a->ob_sval == *b->ob_sval
1158 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001159}
1160
Guido van Rossum9bfef441993-03-29 10:43:31 +00001161static long
Fred Drakeba096332000-07-09 07:04:36 +00001162string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +00001163{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001164 register Py_ssize_t len;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001165 register unsigned char *p;
1166 register long x;
1167
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001168 if (a->ob_shash != -1)
1169 return a->ob_shash;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001170 len = a->ob_size;
1171 p = (unsigned char *) a->ob_sval;
1172 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001173 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +00001174 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001175 x ^= a->ob_size;
1176 if (x == -1)
1177 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00001178 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +00001179 return x;
1180}
1181
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001182#define HASINDEX(o) PyType_HasFeature((o)->ob_type, Py_TPFLAGS_HAVE_INDEX)
1183
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001184static PyObject*
1185string_subscript(PyStringObject* self, PyObject* item)
1186{
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001187 PyNumberMethods *nb = item->ob_type->tp_as_number;
1188 if (nb != NULL && HASINDEX(item) && nb->nb_index != NULL) {
1189 Py_ssize_t i = nb->nb_index(item);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001190 if (i == -1 && PyErr_Occurred())
1191 return NULL;
1192 if (i < 0)
1193 i += PyString_GET_SIZE(self);
Guido van Rossum38fff8c2006-03-07 18:50:55 +00001194 return string_item(self, i);
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001195 }
1196 else if (PySlice_Check(item)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001197 Py_ssize_t start, stop, step, slicelength, cur, i;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001198 char* source_buf;
1199 char* result_buf;
1200 PyObject* result;
1201
Tim Petersae1d0c92006-03-17 03:29:34 +00001202 if (PySlice_GetIndicesEx((PySliceObject*)item,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001203 PyString_GET_SIZE(self),
1204 &start, &stop, &step, &slicelength) < 0) {
1205 return NULL;
1206 }
1207
1208 if (slicelength <= 0) {
1209 return PyString_FromStringAndSize("", 0);
1210 }
1211 else {
1212 source_buf = PyString_AsString((PyObject*)self);
1213 result_buf = PyMem_Malloc(slicelength);
Neal Norwitz95c1e502005-10-20 04:15:52 +00001214 if (result_buf == NULL)
1215 return PyErr_NoMemory();
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001216
Tim Petersae1d0c92006-03-17 03:29:34 +00001217 for (cur = start, i = 0; i < slicelength;
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001218 cur += step, i++) {
1219 result_buf[i] = source_buf[cur];
1220 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001221
1222 result = PyString_FromStringAndSize(result_buf,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001223 slicelength);
1224 PyMem_Free(result_buf);
1225 return result;
1226 }
Tim Petersae1d0c92006-03-17 03:29:34 +00001227 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001228 else {
Tim Petersae1d0c92006-03-17 03:29:34 +00001229 PyErr_SetString(PyExc_TypeError,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001230 "string indices must be integers");
1231 return NULL;
1232 }
1233}
1234
Martin v. Löwis18e16552006-02-15 17:27:45 +00001235static Py_ssize_t
1236string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001237{
1238 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001239 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001240 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001241 return -1;
1242 }
1243 *ptr = (void *)self->ob_sval;
1244 return self->ob_size;
1245}
1246
Martin v. Löwis18e16552006-02-15 17:27:45 +00001247static Py_ssize_t
1248string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001249{
Guido van Rossum045e6881997-09-08 18:30:11 +00001250 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001251 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001252 return -1;
1253}
1254
Martin v. Löwis18e16552006-02-15 17:27:45 +00001255static Py_ssize_t
1256string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001257{
1258 if ( lenp )
1259 *lenp = self->ob_size;
1260 return 1;
1261}
1262
Martin v. Löwis18e16552006-02-15 17:27:45 +00001263static Py_ssize_t
1264string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001265{
1266 if ( index != 0 ) {
1267 PyErr_SetString(PyExc_SystemError,
1268 "accessing non-existent string segment");
1269 return -1;
1270 }
1271 *ptr = self->ob_sval;
1272 return self->ob_size;
1273}
1274
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001275static PySequenceMethods string_as_sequence = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001276 (lenfunc)string_length, /*sq_length*/
Guido van Rossum013142a1994-08-30 08:19:36 +00001277 (binaryfunc)string_concat, /*sq_concat*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00001278 (ssizeargfunc)string_repeat, /*sq_repeat*/
1279 (ssizeargfunc)string_item, /*sq_item*/
1280 (ssizessizeargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001281 0, /*sq_ass_item*/
1282 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001283 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001284};
1285
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001286static PyMappingMethods string_as_mapping = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001287 (lenfunc)string_length,
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001288 (binaryfunc)string_subscript,
1289 0,
1290};
1291
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001292static PyBufferProcs string_as_buffer = {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001293 (readbufferproc)string_buffer_getreadbuf,
1294 (writebufferproc)string_buffer_getwritebuf,
1295 (segcountproc)string_buffer_getsegcount,
1296 (charbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001297};
1298
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299
1300
1301#define LEFTSTRIP 0
1302#define RIGHTSTRIP 1
1303#define BOTHSTRIP 2
1304
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001305/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001306static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1307
1308#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001309
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001310#define SPLIT_APPEND(data, left, right) \
1311 str = PyString_FromStringAndSize((data) + (left), \
1312 (right) - (left)); \
1313 if (str == NULL) \
1314 goto onError; \
1315 if (PyList_Append(list, str)) { \
1316 Py_DECREF(str); \
1317 goto onError; \
1318 } \
1319 else \
1320 Py_DECREF(str);
1321
1322#define SPLIT_INSERT(data, left, right) \
1323 str = PyString_FromStringAndSize((data) + (left), \
1324 (right) - (left)); \
1325 if (str == NULL) \
1326 goto onError; \
1327 if (PyList_Insert(list, 0, str)) { \
1328 Py_DECREF(str); \
1329 goto onError; \
1330 } \
1331 else \
1332 Py_DECREF(str);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333
1334static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001335split_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001337 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001338 PyObject *str;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001339 PyObject *list = PyList_New(0);
1340
1341 if (list == NULL)
1342 return NULL;
1343
Guido van Rossum4c08d552000-03-10 22:55:18 +00001344 for (i = j = 0; i < len; ) {
1345 while (i < len && isspace(Py_CHARMASK(s[i])))
1346 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001348 while (i < len && !isspace(Py_CHARMASK(s[i])))
1349 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001351 if (maxsplit-- <= 0)
1352 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001353 SPLIT_APPEND(s, j, i);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001354 while (i < len && isspace(Py_CHARMASK(s[i])))
1355 i++;
1356 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001357 }
1358 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 if (j < len) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001360 SPLIT_APPEND(s, j, len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001361 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001363 onError:
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364 Py_DECREF(list);
1365 return NULL;
1366}
1367
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001368static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001369split_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001370{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001371 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001372 PyObject *str;
1373 PyObject *list = PyList_New(0);
1374
1375 if (list == NULL)
1376 return NULL;
1377
1378 for (i = j = 0; i < len; ) {
1379 if (s[i] == ch) {
1380 if (maxcount-- <= 0)
1381 break;
1382 SPLIT_APPEND(s, j, i);
1383 i = j = i + 1;
1384 } else
1385 i++;
1386 }
1387 if (j <= len) {
1388 SPLIT_APPEND(s, j, len);
1389 }
1390 return list;
1391
1392 onError:
1393 Py_DECREF(list);
1394 return NULL;
1395}
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001397PyDoc_STRVAR(split__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398"S.split([sep [,maxsplit]]) -> list of strings\n\
1399\n\
1400Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001401delimiter string. If maxsplit is given, at most maxsplit\n\
Raymond Hettingerbc552ce2002-08-05 06:28:21 +00001402splits are done. If sep is not specified or is None, any\n\
1403whitespace string is a separator.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404
1405static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001406string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001408 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1409 int err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001410 int maxsplit = -1;
1411 const char *s = PyString_AS_STRING(self), *sub;
1412 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001413
Guido van Rossum4c08d552000-03-10 22:55:18 +00001414 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001415 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001416 if (maxsplit < 0)
1417 maxsplit = INT_MAX;
1418 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001420 if (PyString_Check(subobj)) {
1421 sub = PyString_AS_STRING(subobj);
1422 n = PyString_GET_SIZE(subobj);
1423 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001424#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001425 else if (PyUnicode_Check(subobj))
1426 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001427#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001428 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1429 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001430
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001431 if (n == 0) {
1432 PyErr_SetString(PyExc_ValueError, "empty separator");
1433 return NULL;
1434 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001435 else if (n == 1)
1436 return split_char(s, len, sub[0], maxsplit);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437
1438 list = PyList_New(0);
1439 if (list == NULL)
1440 return NULL;
1441
1442 i = j = 0;
1443 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001444 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001445 if (maxsplit-- <= 0)
1446 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001447 item = PyString_FromStringAndSize(s+j, i-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001448 if (item == NULL)
1449 goto fail;
1450 err = PyList_Append(list, item);
1451 Py_DECREF(item);
1452 if (err < 0)
1453 goto fail;
1454 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455 }
1456 else
1457 i++;
1458 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001459 item = PyString_FromStringAndSize(s+j, len-j);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 if (item == NULL)
1461 goto fail;
1462 err = PyList_Append(list, item);
1463 Py_DECREF(item);
1464 if (err < 0)
1465 goto fail;
1466
1467 return list;
1468
1469 fail:
1470 Py_DECREF(list);
1471 return NULL;
1472}
1473
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001474static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001475rsplit_whitespace(const char *s, Py_ssize_t len, int maxsplit)
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001476{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001477 Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001478 PyObject *str;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001479 PyObject *list = PyList_New(0);
1480
1481 if (list == NULL)
1482 return NULL;
1483
1484 for (i = j = len - 1; i >= 0; ) {
1485 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1486 i--;
1487 j = i;
1488 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
1489 i--;
1490 if (j > i) {
1491 if (maxsplit-- <= 0)
1492 break;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001493 SPLIT_INSERT(s, i + 1, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001494 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
1495 i--;
1496 j = i;
1497 }
1498 }
1499 if (j >= 0) {
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001500 SPLIT_INSERT(s, 0, j + 1);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001501 }
1502 return list;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001503 onError:
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001504 Py_DECREF(list);
1505 return NULL;
1506}
1507
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001508static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00001509rsplit_char(const char *s, Py_ssize_t len, char ch, int maxcount)
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001510{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001511 register Py_ssize_t i, j;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001512 PyObject *str;
1513 PyObject *list = PyList_New(0);
1514
1515 if (list == NULL)
1516 return NULL;
1517
1518 for (i = j = len - 1; i >= 0; ) {
1519 if (s[i] == ch) {
1520 if (maxcount-- <= 0)
1521 break;
1522 SPLIT_INSERT(s, i + 1, j + 1);
1523 j = i = i - 1;
1524 } else
1525 i--;
1526 }
1527 if (j >= -1) {
1528 SPLIT_INSERT(s, 0, j + 1);
1529 }
1530 return list;
1531
1532 onError:
1533 Py_DECREF(list);
1534 return NULL;
1535}
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001536
1537PyDoc_STRVAR(rsplit__doc__,
1538"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1539\n\
1540Return a list of the words in the string S, using sep as the\n\
1541delimiter string, starting at the end of the string and working\n\
1542to the front. If maxsplit is given, at most maxsplit splits are\n\
1543done. If sep is not specified or is None, any whitespace string\n\
1544is a separator.");
1545
1546static PyObject *
1547string_rsplit(PyStringObject *self, PyObject *args)
1548{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001549 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1550 int err;
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001551 int maxsplit = -1;
1552 const char *s = PyString_AS_STRING(self), *sub;
1553 PyObject *list, *item, *subobj = Py_None;
1554
1555 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
1556 return NULL;
1557 if (maxsplit < 0)
1558 maxsplit = INT_MAX;
1559 if (subobj == Py_None)
1560 return rsplit_whitespace(s, len, maxsplit);
1561 if (PyString_Check(subobj)) {
1562 sub = PyString_AS_STRING(subobj);
1563 n = PyString_GET_SIZE(subobj);
1564 }
1565#ifdef Py_USING_UNICODE
1566 else if (PyUnicode_Check(subobj))
1567 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1568#endif
1569 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1570 return NULL;
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001571
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001572 if (n == 0) {
1573 PyErr_SetString(PyExc_ValueError, "empty separator");
1574 return NULL;
1575 }
Hye-Shik Chang75c00ef2004-01-05 00:29:51 +00001576 else if (n == 1)
1577 return rsplit_char(s, len, sub[0], maxsplit);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001578
1579 list = PyList_New(0);
1580 if (list == NULL)
1581 return NULL;
1582
1583 j = len;
1584 i = j - n;
1585 while (i >= 0) {
1586 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
1587 if (maxsplit-- <= 0)
1588 break;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001589 item = PyString_FromStringAndSize(s+i+n, j-i-n);
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00001590 if (item == NULL)
1591 goto fail;
1592 err = PyList_Insert(list, 0, item);
1593 Py_DECREF(item);
1594 if (err < 0)
1595 goto fail;
1596 j = i;
1597 i -= n;
1598 }
1599 else
1600 i--;
1601 }
1602 item = PyString_FromStringAndSize(s, j);
1603 if (item == NULL)
1604 goto fail;
1605 err = PyList_Insert(list, 0, item);
1606 Py_DECREF(item);
1607 if (err < 0)
1608 goto fail;
1609
1610 return list;
1611
1612 fail:
1613 Py_DECREF(list);
1614 return NULL;
1615}
1616
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001618PyDoc_STRVAR(join__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619"S.join(sequence) -> string\n\
1620\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621Return a string which is the concatenation of the strings in the\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001622sequence. The separator between elements is S.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623
1624static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001625string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626{
1627 char *sep = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001628 const Py_ssize_t seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001629 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 char *p;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001631 Py_ssize_t seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001632 size_t sz = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001633 Py_ssize_t i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001634 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001635
Tim Peters19fe14e2001-01-19 03:03:47 +00001636 seq = PySequence_Fast(orig, "");
1637 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001638 return NULL;
1639 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001640
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001641 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001642 if (seqlen == 0) {
1643 Py_DECREF(seq);
1644 return PyString_FromString("");
1645 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001646 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001647 item = PySequence_Fast_GET_ITEM(seq, 0);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001648 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1649 Py_INCREF(item);
Tim Peters19fe14e2001-01-19 03:03:47 +00001650 Py_DECREF(seq);
Raymond Hettinger674f2412004-08-23 23:23:54 +00001651 return item;
Tim Peters19fe14e2001-01-19 03:03:47 +00001652 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001654
Raymond Hettinger674f2412004-08-23 23:23:54 +00001655 /* There are at least two things to join, or else we have a subclass
Tim Petersae1d0c92006-03-17 03:29:34 +00001656 * of the builtin types in the sequence.
Raymond Hettinger674f2412004-08-23 23:23:54 +00001657 * Do a pre-pass to figure out the total amount of space we'll
1658 * need (sz), see whether any argument is absurd, and defer to
1659 * the Unicode join if appropriate.
Tim Peters19fe14e2001-01-19 03:03:47 +00001660 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001661 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001662 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001663 item = PySequence_Fast_GET_ITEM(seq, i);
1664 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001665#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001666 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001667 /* Defer to Unicode join.
1668 * CAUTION: There's no gurantee that the
1669 * original sequence can be iterated over
1670 * again, so we must pass seq here.
1671 */
1672 PyObject *result;
1673 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001674 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001675 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001676 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001677#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001678 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001679 "sequence item %i: expected string,"
1680 " %.80s found",
Martin v. Löwis18e16552006-02-15 17:27:45 +00001681 /*XXX*/(int)i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001682 Py_DECREF(seq);
1683 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001684 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001685 sz += PyString_GET_SIZE(item);
1686 if (i != 0)
1687 sz += seplen;
1688 if (sz < old_sz || sz > INT_MAX) {
1689 PyErr_SetString(PyExc_OverflowError,
1690 "join() is too long for a Python string");
1691 Py_DECREF(seq);
1692 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001693 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001694 }
1695
1696 /* Allocate result space. */
Martin v. Löwis18e16552006-02-15 17:27:45 +00001697 res = PyString_FromStringAndSize((char*)NULL, sz);
Tim Peters19fe14e2001-01-19 03:03:47 +00001698 if (res == NULL) {
1699 Py_DECREF(seq);
1700 return NULL;
1701 }
1702
1703 /* Catenate everything. */
1704 p = PyString_AS_STRING(res);
1705 for (i = 0; i < seqlen; ++i) {
1706 size_t n;
1707 item = PySequence_Fast_GET_ITEM(seq, i);
1708 n = PyString_GET_SIZE(item);
1709 memcpy(p, PyString_AS_STRING(item), n);
1710 p += n;
1711 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001712 memcpy(p, sep, seplen);
1713 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001714 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001715 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001716
Jeremy Hylton49048292000-07-11 03:28:17 +00001717 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001718 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719}
1720
Tim Peters52e155e2001-06-16 05:42:57 +00001721PyObject *
1722_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001723{
Tim Petersa7259592001-06-16 05:11:17 +00001724 assert(sep != NULL && PyString_Check(sep));
1725 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001726 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001727}
1728
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001729static void
Martin v. Löwis18e16552006-02-15 17:27:45 +00001730string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001731{
1732 if (*end > len)
1733 *end = len;
1734 else if (*end < 0)
1735 *end += len;
1736 if (*end < 0)
1737 *end = 0;
1738 if (*start < 0)
1739 *start += len;
1740 if (*start < 0)
1741 *start = 0;
1742}
1743
Martin v. Löwis18e16552006-02-15 17:27:45 +00001744static Py_ssize_t
Fred Drakeba096332000-07-09 07:04:36 +00001745string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001747 const char *s = PyString_AS_STRING(self), *sub;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001748 Py_ssize_t len = PyString_GET_SIZE(self);
1749 Py_ssize_t n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001750 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751
Martin v. Löwis18e16552006-02-15 17:27:45 +00001752 /* XXX ssize_t i */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001753 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001754 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 return -2;
1756 if (PyString_Check(subobj)) {
1757 sub = PyString_AS_STRING(subobj);
1758 n = PyString_GET_SIZE(subobj);
1759 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001760#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001761 else if (PyUnicode_Check(subobj))
Guido van Rossum76afbd92002-08-20 17:29:29 +00001762 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001763#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001764 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001765 return -2;
1766
Neal Norwitz1f68fc72002-06-14 00:50:42 +00001767 string_adjust_indices(&i, &last, len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001768
Guido van Rossum4c08d552000-03-10 22:55:18 +00001769 if (dir > 0) {
1770 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001771 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001772 last -= n;
1773 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001774 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001775 return (long)i;
1776 }
1777 else {
Martin v. Löwis18e16552006-02-15 17:27:45 +00001778 Py_ssize_t j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001779
Guido van Rossum4c08d552000-03-10 22:55:18 +00001780 if (n == 0 && i <= last)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001781 return last;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001782 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001783 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00001784 return j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001785 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001786
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787 return -1;
1788}
1789
1790
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001791PyDoc_STRVAR(find__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792"S.find(sub [,start [,end]]) -> int\n\
1793\n\
1794Return the lowest index in S where substring sub is found,\n\
1795such that sub is contained within s[start,end]. Optional\n\
1796arguments start and end are interpreted as in slice notation.\n\
1797\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001798Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799
1800static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001801string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001802{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001803 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804 if (result == -2)
1805 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001806 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807}
1808
1809
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001810PyDoc_STRVAR(index__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001811"S.index(sub [,start [,end]]) -> int\n\
1812\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001813Like S.find() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814
1815static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001816string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001818 Py_ssize_t result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001819 if (result == -2)
1820 return NULL;
1821 if (result == -1) {
1822 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001823 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824 return NULL;
1825 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001826 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001827}
1828
1829
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001830PyDoc_STRVAR(rfind__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831"S.rfind(sub [,start [,end]]) -> int\n\
1832\n\
1833Return the highest index in S where substring sub is found,\n\
1834such that sub is contained within s[start,end]. Optional\n\
1835arguments start and end are interpreted as in slice notation.\n\
1836\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001837Return -1 on failure.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001838
1839static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001840string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001841{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001842 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001843 if (result == -2)
1844 return NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00001845 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846}
1847
1848
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001849PyDoc_STRVAR(rindex__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001850"S.rindex(sub [,start [,end]]) -> int\n\
1851\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001852Like S.rfind() but raise ValueError when the substring is not found.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853
1854static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001855string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001856{
Martin v. Löwis18e16552006-02-15 17:27:45 +00001857 Py_ssize_t result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858 if (result == -2)
1859 return NULL;
1860 if (result == -1) {
1861 PyErr_SetString(PyExc_ValueError,
Raymond Hettinger5d5e7c02003-01-15 05:32:57 +00001862 "substring not found");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001863 return NULL;
1864 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00001865 return PyInt_FromSsize_t(result);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001866}
1867
1868
1869static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001870do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1871{
1872 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001873 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001874 char *sep = PyString_AS_STRING(sepobj);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001875 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1876 Py_ssize_t i, j;
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001877
1878 i = 0;
1879 if (striptype != RIGHTSTRIP) {
1880 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1881 i++;
1882 }
1883 }
1884
1885 j = len;
1886 if (striptype != LEFTSTRIP) {
1887 do {
1888 j--;
1889 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1890 j++;
1891 }
1892
1893 if (i == 0 && j == len && PyString_CheckExact(self)) {
1894 Py_INCREF(self);
1895 return (PyObject*)self;
1896 }
1897 else
1898 return PyString_FromStringAndSize(s+i, j-i);
1899}
1900
1901
1902static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001903do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001904{
1905 char *s = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00001906 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001907
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001908 i = 0;
1909 if (striptype != RIGHTSTRIP) {
1910 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1911 i++;
1912 }
1913 }
1914
1915 j = len;
1916 if (striptype != LEFTSTRIP) {
1917 do {
1918 j--;
1919 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1920 j++;
1921 }
1922
Tim Peters8fa5dd02001-09-12 02:18:30 +00001923 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924 Py_INCREF(self);
1925 return (PyObject*)self;
1926 }
1927 else
1928 return PyString_FromStringAndSize(s+i, j-i);
1929}
1930
1931
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001932static PyObject *
1933do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1934{
1935 PyObject *sep = NULL;
1936
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001937 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001938 return NULL;
1939
1940 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001941 if (PyString_Check(sep))
1942 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001943#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001944 else if (PyUnicode_Check(sep)) {
1945 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1946 PyObject *res;
1947 if (uniself==NULL)
1948 return NULL;
1949 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1950 striptype, sep);
1951 Py_DECREF(uniself);
1952 return res;
1953 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001954#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001955 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001956 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001957#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001958 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001959#else
1960 "%s arg must be None or str",
1961#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001962 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001963 return NULL;
1964 }
1965 return do_xstrip(self, striptype, sep);
1966 }
1967
1968 return do_strip(self, striptype);
1969}
1970
1971
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001972PyDoc_STRVAR(strip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001973"S.strip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001974\n\
1975Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001976whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001977If chars is given and not None, remove characters in chars instead.\n\
1978If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979
1980static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001981string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001982{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001983 if (PyTuple_GET_SIZE(args) == 0)
1984 return do_strip(self, BOTHSTRIP); /* Common case */
1985 else
1986 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001987}
1988
1989
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00001990PyDoc_STRVAR(lstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00001991"S.lstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001993Return a copy of the string S with leading whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00001994If chars is given and not None, remove characters in chars instead.\n\
1995If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001996
1997static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001998string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001999{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002000 if (PyTuple_GET_SIZE(args) == 0)
2001 return do_strip(self, LEFTSTRIP); /* Common case */
2002 else
2003 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002004}
2005
2006
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002007PyDoc_STRVAR(rstrip__doc__,
Neal Norwitzffe33b72003-04-10 22:35:32 +00002008"S.rstrip([chars]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002010Return a copy of the string S with trailing whitespace removed.\n\
Neal Norwitzffe33b72003-04-10 22:35:32 +00002011If chars is given and not None, remove characters in chars instead.\n\
2012If chars is unicode, S will be converted to unicode before stripping");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013
2014static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002015string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002016{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00002017 if (PyTuple_GET_SIZE(args) == 0)
2018 return do_strip(self, RIGHTSTRIP); /* Common case */
2019 else
2020 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021}
2022
2023
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002024PyDoc_STRVAR(lower__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025"S.lower() -> string\n\
2026\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002027Return a copy of the string S converted to lowercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002028
2029static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002030string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031{
2032 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002033 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002034 PyObject *new;
2035
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036 new = PyString_FromStringAndSize(NULL, n);
2037 if (new == NULL)
2038 return NULL;
2039 s_new = PyString_AsString(new);
2040 for (i = 0; i < n; i++) {
2041 int c = Py_CHARMASK(*s++);
2042 if (isupper(c)) {
2043 *s_new = tolower(c);
2044 } else
2045 *s_new = c;
2046 s_new++;
2047 }
2048 return new;
2049}
2050
2051
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002052PyDoc_STRVAR(upper__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002053"S.upper() -> string\n\
2054\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002055Return a copy of the string S converted to uppercase.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002056
2057static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002058string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002059{
2060 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002061 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002062 PyObject *new;
2063
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 new = PyString_FromStringAndSize(NULL, n);
2065 if (new == NULL)
2066 return NULL;
2067 s_new = PyString_AsString(new);
2068 for (i = 0; i < n; i++) {
2069 int c = Py_CHARMASK(*s++);
2070 if (islower(c)) {
2071 *s_new = toupper(c);
2072 } else
2073 *s_new = c;
2074 s_new++;
2075 }
2076 return new;
2077}
2078
2079
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002080PyDoc_STRVAR(title__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002081"S.title() -> string\n\
2082\n\
2083Return a titlecased version of S, i.e. words start with uppercase\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002084characters, all remaining cased characters have lowercase.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085
2086static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002087string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002088{
2089 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002090 Py_ssize_t i, n = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002091 int previous_is_cased = 0;
2092 PyObject *new;
2093
Guido van Rossum4c08d552000-03-10 22:55:18 +00002094 new = PyString_FromStringAndSize(NULL, n);
2095 if (new == NULL)
2096 return NULL;
2097 s_new = PyString_AsString(new);
2098 for (i = 0; i < n; i++) {
2099 int c = Py_CHARMASK(*s++);
2100 if (islower(c)) {
2101 if (!previous_is_cased)
2102 c = toupper(c);
2103 previous_is_cased = 1;
2104 } else if (isupper(c)) {
2105 if (previous_is_cased)
2106 c = tolower(c);
2107 previous_is_cased = 1;
2108 } else
2109 previous_is_cased = 0;
2110 *s_new++ = c;
2111 }
2112 return new;
2113}
2114
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002115PyDoc_STRVAR(capitalize__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002116"S.capitalize() -> string\n\
2117\n\
2118Return a copy of the string S with only its first character\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002119capitalized.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120
2121static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002122string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002123{
2124 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002125 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002126 PyObject *new;
2127
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 new = PyString_FromStringAndSize(NULL, n);
2129 if (new == NULL)
2130 return NULL;
2131 s_new = PyString_AsString(new);
2132 if (0 < n) {
2133 int c = Py_CHARMASK(*s++);
2134 if (islower(c))
2135 *s_new = toupper(c);
2136 else
2137 *s_new = c;
2138 s_new++;
2139 }
2140 for (i = 1; i < n; i++) {
2141 int c = Py_CHARMASK(*s++);
2142 if (isupper(c))
2143 *s_new = tolower(c);
2144 else
2145 *s_new = c;
2146 s_new++;
2147 }
2148 return new;
2149}
2150
2151
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002152PyDoc_STRVAR(count__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153"S.count(sub[, start[, end]]) -> int\n\
2154\n\
2155Return the number of occurrences of substring sub in string\n\
2156S[start:end]. Optional arguments start and end are\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002157interpreted as in slice notation.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158
2159static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002160string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161{
Raymond Hettinger57e74472005-02-20 09:54:53 +00002162 const char *s = PyString_AS_STRING(self), *sub, *t;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002163 Py_ssize_t len = PyString_GET_SIZE(self), n;
2164 Py_ssize_t i = 0, last = INT_MAX;
2165 Py_ssize_t m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002166 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167
Guido van Rossumc6821402000-05-08 14:08:05 +00002168 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
2169 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00002171
Guido van Rossum4c08d552000-03-10 22:55:18 +00002172 if (PyString_Check(subobj)) {
2173 sub = PyString_AS_STRING(subobj);
2174 n = PyString_GET_SIZE(subobj);
2175 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002176#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002177 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002178 Py_ssize_t count;
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002179 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
2180 if (count == -1)
2181 return NULL;
2182 else
2183 return PyInt_FromLong((long) count);
2184 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002185#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002186 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
2187 return NULL;
2188
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002189 string_adjust_indices(&i, &last, len);
2190
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002191 m = last + 1 - n;
2192 if (n == 0)
Martin v. Löwis18e16552006-02-15 17:27:45 +00002193 return PyInt_FromSsize_t(m-i);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194
2195 r = 0;
2196 while (i < m) {
2197 if (!memcmp(s+i, sub, n)) {
2198 r++;
2199 i += n;
2200 } else {
2201 i++;
2202 }
Raymond Hettinger57e74472005-02-20 09:54:53 +00002203 if (i >= m)
2204 break;
2205 t = memchr(s+i, sub[0], m-i);
2206 if (t == NULL)
2207 break;
2208 i = t - s;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209 }
Martin v. Löwis18e16552006-02-15 17:27:45 +00002210 return PyInt_FromSsize_t(r);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211}
2212
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002213PyDoc_STRVAR(swapcase__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002214"S.swapcase() -> string\n\
2215\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002216Return a copy of the string S with uppercase characters\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002217converted to lowercase and vice versa.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218
2219static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002220string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002221{
2222 char *s = PyString_AS_STRING(self), *s_new;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002223 Py_ssize_t i, n = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002224 PyObject *new;
2225
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002226 new = PyString_FromStringAndSize(NULL, n);
2227 if (new == NULL)
2228 return NULL;
2229 s_new = PyString_AsString(new);
2230 for (i = 0; i < n; i++) {
2231 int c = Py_CHARMASK(*s++);
2232 if (islower(c)) {
2233 *s_new = toupper(c);
2234 }
2235 else if (isupper(c)) {
2236 *s_new = tolower(c);
2237 }
2238 else
2239 *s_new = c;
2240 s_new++;
2241 }
2242 return new;
2243}
2244
2245
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002246PyDoc_STRVAR(translate__doc__,
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002247"S.translate(table [,deletechars]) -> string\n\
2248\n\
2249Return a copy of the string S, where all characters occurring\n\
2250in the optional argument deletechars are removed, and the\n\
2251remaining characters have been mapped through the given\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002252translation table, which must be a string of length 256.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002253
2254static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002255string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002256{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257 register char *input, *output;
2258 register const char *table;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002259 register Py_ssize_t i, c, changed = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002260 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002261 const char *table1, *output_start, *del_table=NULL;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002262 Py_ssize_t inlen, tablen, dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002263 PyObject *result;
2264 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002266
Raymond Hettingerea3fdf42002-12-29 16:33:45 +00002267 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002269 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002270
2271 if (PyString_Check(tableobj)) {
2272 table1 = PyString_AS_STRING(tableobj);
2273 tablen = PyString_GET_SIZE(tableobj);
2274 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002275#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002277 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278 parameter; instead a mapping to None will cause characters
2279 to be deleted. */
2280 if (delobj != NULL) {
2281 PyErr_SetString(PyExc_TypeError,
2282 "deletions are implemented differently for unicode");
2283 return NULL;
2284 }
2285 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2286 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002287#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002288 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002289 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002290
Martin v. Löwis00b61272002-12-12 20:03:19 +00002291 if (tablen != 256) {
2292 PyErr_SetString(PyExc_ValueError,
2293 "translation table must be 256 characters long");
2294 return NULL;
2295 }
2296
Guido van Rossum4c08d552000-03-10 22:55:18 +00002297 if (delobj != NULL) {
2298 if (PyString_Check(delobj)) {
2299 del_table = PyString_AS_STRING(delobj);
2300 dellen = PyString_GET_SIZE(delobj);
2301 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002302#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303 else if (PyUnicode_Check(delobj)) {
2304 PyErr_SetString(PyExc_TypeError,
2305 "deletions are implemented differently for unicode");
2306 return NULL;
2307 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002308#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2310 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311 }
2312 else {
2313 del_table = NULL;
2314 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002315 }
2316
2317 table = table1;
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002318 inlen = PyString_GET_SIZE(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002319 result = PyString_FromStringAndSize((char *)NULL, inlen);
2320 if (result == NULL)
2321 return NULL;
2322 output_start = output = PyString_AsString(result);
Neal Norwitz2aa9a5d2006-03-20 01:53:23 +00002323 input = PyString_AS_STRING(input_obj);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324
2325 if (dellen == 0) {
2326 /* If no deletions are required, use faster code */
2327 for (i = inlen; --i >= 0; ) {
2328 c = Py_CHARMASK(*input++);
2329 if (Py_CHARMASK((*output++ = table[c])) != c)
2330 changed = 1;
2331 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002332 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002333 return result;
2334 Py_DECREF(result);
2335 Py_INCREF(input_obj);
2336 return input_obj;
2337 }
2338
2339 for (i = 0; i < 256; i++)
2340 trans_table[i] = Py_CHARMASK(table[i]);
2341
2342 for (i = 0; i < dellen; i++)
2343 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2344
2345 for (i = inlen; --i >= 0; ) {
2346 c = Py_CHARMASK(*input++);
2347 if (trans_table[c] != -1)
2348 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2349 continue;
2350 changed = 1;
2351 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00002352 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002353 Py_DECREF(result);
2354 Py_INCREF(input_obj);
2355 return input_obj;
2356 }
2357 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00002358 if (inlen > 0)
2359 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 return result;
2361}
2362
2363
2364/* What follows is used for implementing replace(). Perry Stoll. */
2365
2366/*
2367 mymemfind
2368
2369 strstr replacement for arbitrary blocks of memory.
2370
Barry Warsaw51ac5802000-03-20 16:36:48 +00002371 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002372 contents of memory pointed to by PAT. Returns the index into MEM if
2373 found, or -1 if not found. If len of PAT is greater than length of
2374 MEM, the function returns -1.
2375*/
Martin v. Löwis18e16552006-02-15 17:27:45 +00002376static Py_ssize_t
2377mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002378{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002379 register Py_ssize_t ii;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002380
2381 /* pattern can not occur in the last pat_len-1 chars */
2382 len -= pat_len;
2383
2384 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00002385 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002386 return ii;
2387 }
2388 }
2389 return -1;
2390}
2391
2392/*
2393 mymemcnt
2394
2395 Return the number of distinct times PAT is found in MEM.
2396 meaning mem=1111 and pat==11 returns 2.
2397 mem=11111 and pat==11 also return 2.
2398 */
Martin v. Löwis18e16552006-02-15 17:27:45 +00002399static Py_ssize_t
2400mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002401{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002402 register Py_ssize_t offset = 0;
2403 Py_ssize_t nfound = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002404
2405 while (len >= 0) {
2406 offset = mymemfind(mem, len, pat, pat_len);
2407 if (offset == -1)
2408 break;
2409 mem += offset + pat_len;
2410 len -= offset + pat_len;
2411 nfound++;
2412 }
2413 return nfound;
2414}
2415
2416/*
2417 mymemreplace
2418
Thomas Wouters7e474022000-07-16 12:04:32 +00002419 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002420 replaced with SUB.
2421
Thomas Wouters7e474022000-07-16 12:04:32 +00002422 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002423 of PAT in STR, then the original string is returned. Otherwise, a new
2424 string is allocated here and returned.
2425
2426 on return, out_len is:
2427 the length of output string, or
2428 -1 if the input string is returned, or
2429 unchanged if an error occurs (no memory).
2430
2431 return value is:
2432 the new string allocated locally, or
2433 NULL if an error occurred.
2434*/
2435static char *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002436mymemreplace(const char *str, Py_ssize_t len, /* input string */
2437 const char *pat, Py_ssize_t pat_len, /* pattern string to find */
2438 const char *sub, Py_ssize_t sub_len, /* substitution string */
2439 Py_ssize_t count, /* number of replacements */
2440 Py_ssize_t *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002441{
2442 char *out_s;
2443 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002444 Py_ssize_t nfound, offset, new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002445
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002446 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002447 goto return_same;
2448
2449 /* find length of output string */
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002450 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
Tim Peters9c012af2001-05-10 00:32:57 +00002451 if (count < 0)
2452 count = INT_MAX;
2453 else if (nfound > count)
2454 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002455 if (nfound == 0)
2456 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002457
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002458 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002459 if (new_len == 0) {
2460 /* Have to allocate something for the caller to free(). */
2461 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002462 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002463 return NULL;
2464 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002465 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002466 else {
2467 assert(new_len > 0);
2468 new_s = (char *)PyMem_MALLOC(new_len);
2469 if (new_s == NULL)
2470 return NULL;
2471 out_s = new_s;
2472
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002473 if (pat_len > 0) {
2474 for (; nfound > 0; --nfound) {
2475 /* find index of next instance of pattern */
2476 offset = mymemfind(str, len, pat, pat_len);
2477 if (offset == -1)
2478 break;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002479
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002480 /* copy non matching part of input string */
2481 memcpy(new_s, str, offset);
2482 str += offset + pat_len;
2483 len -= offset + pat_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002484
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002485 /* copy substitute into the output string */
2486 new_s += offset;
2487 memcpy(new_s, sub, sub_len);
2488 new_s += sub_len;
2489 }
2490 /* copy any remaining values into output string */
2491 if (len > 0)
2492 memcpy(new_s, str, len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002493 }
Guido van Rossum8b1a6d62002-08-23 18:21:28 +00002494 else {
2495 for (;;++str, --len) {
2496 memcpy(new_s, sub, sub_len);
2497 new_s += sub_len;
2498 if (--nfound <= 0) {
2499 memcpy(new_s, str, len);
2500 break;
2501 }
2502 *new_s++ = *str;
2503 }
2504 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002505 }
2506 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002507 return out_s;
2508
2509 return_same:
2510 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002511 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002512}
2513
2514
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002515PyDoc_STRVAR(replace__doc__,
Fred Draked22bb652003-10-22 02:56:40 +00002516"S.replace (old, new[, count]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002517\n\
2518Return a copy of string S with all occurrences of substring\n\
Fred Draked22bb652003-10-22 02:56:40 +00002519old replaced by new. If the optional argument count is\n\
2520given, only the first count occurrences are replaced.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521
2522static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002523string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002524{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002525 const char *str = PyString_AS_STRING(self), *sub, *repl;
2526 char *new_s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002527 const Py_ssize_t len = PyString_GET_SIZE(self);
2528 Py_ssize_t sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002529 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002530 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002531 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002532
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533 if (!PyArg_ParseTuple(args, "OO|i:replace",
2534 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002535 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002536
2537 if (PyString_Check(subobj)) {
2538 sub = PyString_AS_STRING(subobj);
2539 sub_len = PyString_GET_SIZE(subobj);
2540 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002541#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002543 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002545#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002546 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2547 return NULL;
2548
2549 if (PyString_Check(replobj)) {
2550 repl = PyString_AS_STRING(replobj);
2551 repl_len = PyString_GET_SIZE(replobj);
2552 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002553#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002554 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002555 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002556 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002557#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2559 return NULL;
2560
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002562 if (new_s == NULL) {
2563 PyErr_NoMemory();
2564 return NULL;
2565 }
2566 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002567 if (PyString_CheckExact(self)) {
2568 /* we're returning another reference to self */
2569 new = (PyObject*)self;
2570 Py_INCREF(new);
2571 }
2572 else {
2573 new = PyString_FromStringAndSize(str, len);
2574 if (new == NULL)
2575 return NULL;
2576 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002577 }
2578 else {
2579 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002580 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002581 }
2582 return new;
2583}
2584
2585
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002586PyDoc_STRVAR(startswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002587"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002588\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002589Return True if S starts with the specified prefix, False otherwise.\n\
2590With optional start, test S beginning at that position.\n\
2591With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002592
2593static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002594string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002595{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002596 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002597 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002598 const char* prefix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002599 Py_ssize_t plen;
2600 Py_ssize_t start = 0;
2601 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002602 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002603
Guido van Rossumc6821402000-05-08 14:08:05 +00002604 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2605 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002606 return NULL;
2607 if (PyString_Check(subobj)) {
2608 prefix = PyString_AS_STRING(subobj);
2609 plen = PyString_GET_SIZE(subobj);
2610 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002611#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002612 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002613 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002614 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002615 subobj, start, end, -1);
2616 if (rc == -1)
2617 return NULL;
2618 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002619 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002620 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002621#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002622 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002623 return NULL;
2624
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002625 string_adjust_indices(&start, &end, len);
2626
2627 if (start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002628 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002629
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002630 if (end-start >= plen)
2631 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
2632 else
2633 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002634}
2635
2636
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002637PyDoc_STRVAR(endswith__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00002638"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002639\n\
Guido van Rossuma7132182003-04-09 19:32:45 +00002640Return True if S ends with the specified suffix, False otherwise.\n\
2641With optional start, test S beginning at that position.\n\
2642With optional end, stop comparing S at that position.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002643
2644static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002645string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002646{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002647 const char* str = PyString_AS_STRING(self);
Martin v. Löwis18e16552006-02-15 17:27:45 +00002648 Py_ssize_t len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002649 const char* suffix;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002650 Py_ssize_t slen;
2651 Py_ssize_t start = 0;
2652 Py_ssize_t end = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002653 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002654
Guido van Rossumc6821402000-05-08 14:08:05 +00002655 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2656 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002657 return NULL;
2658 if (PyString_Check(subobj)) {
2659 suffix = PyString_AS_STRING(subobj);
2660 slen = PyString_GET_SIZE(subobj);
2661 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002662#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002663 else if (PyUnicode_Check(subobj)) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00002664 Py_ssize_t rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002665 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002666 subobj, start, end, +1);
2667 if (rc == -1)
2668 return NULL;
2669 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002670 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002671 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002672#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002673 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002674 return NULL;
2675
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002676 string_adjust_indices(&start, &end, len);
2677
2678 if (end-start < slen || start > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002679 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002680
Neal Norwitz1f68fc72002-06-14 00:50:42 +00002681 if (end-slen > start)
2682 start = end - slen;
2683 if (end-start >= slen)
2684 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
2685 else
2686 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002687}
2688
2689
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002690PyDoc_STRVAR(encode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002691"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002692\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002693Encodes S using the codec registered for encoding. encoding defaults\n\
2694to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002695handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002696a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
2697'xmlcharrefreplace' as well as any other name registered with\n\
2698codecs.register_error that is able to handle UnicodeEncodeErrors.");
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002699
2700static PyObject *
2701string_encode(PyStringObject *self, PyObject *args)
2702{
2703 char *encoding = NULL;
2704 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002705 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00002706
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002707 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2708 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002709 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002710 if (v == NULL)
2711 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002712 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2713 PyErr_Format(PyExc_TypeError,
2714 "encoder did not return a string/unicode object "
2715 "(type=%.400s)",
2716 v->ob_type->tp_name);
2717 Py_DECREF(v);
2718 return NULL;
2719 }
2720 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002721
2722 onError:
2723 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002724}
2725
2726
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002727PyDoc_STRVAR(decode__doc__,
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002728"S.decode([encoding[,errors]]) -> object\n\
2729\n\
2730Decodes S using the codec registered for encoding. encoding defaults\n\
2731to the default encoding. errors may be given to set a different error\n\
2732handling scheme. Default is 'strict' meaning that encoding errors raise\n\
Walter Dörwald3aeb6322002-09-02 13:14:32 +00002733a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
2734as well as any other name registerd with codecs.register_error that is\n\
2735able to handle UnicodeDecodeErrors.");
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002736
2737static PyObject *
2738string_decode(PyStringObject *self, PyObject *args)
2739{
2740 char *encoding = NULL;
2741 char *errors = NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002742 PyObject *v;
Tim Petersae1d0c92006-03-17 03:29:34 +00002743
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002744 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2745 return NULL;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002746 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002747 if (v == NULL)
2748 goto onError;
Marc-André Lemburgd2d45982004-07-08 17:57:32 +00002749 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
2750 PyErr_Format(PyExc_TypeError,
2751 "decoder did not return a string/unicode object "
2752 "(type=%.400s)",
2753 v->ob_type->tp_name);
2754 Py_DECREF(v);
2755 return NULL;
2756 }
2757 return v;
Marc-André Lemburg1dffb122004-07-08 19:13:55 +00002758
2759 onError:
2760 return NULL;
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002761}
2762
2763
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002764PyDoc_STRVAR(expandtabs__doc__,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002765"S.expandtabs([tabsize]) -> string\n\
2766\n\
2767Return a copy of S where all tab characters are expanded using spaces.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002768If tabsize is not given, a tab size of 8 characters is assumed.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002769
2770static PyObject*
2771string_expandtabs(PyStringObject *self, PyObject *args)
2772{
2773 const char *e, *p;
2774 char *q;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002775 Py_ssize_t i, j;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002776 PyObject *u;
2777 int tabsize = 8;
2778
2779 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2780 return NULL;
2781
Thomas Wouters7e474022000-07-16 12:04:32 +00002782 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002783 i = j = 0;
2784 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2785 for (p = PyString_AS_STRING(self); p < e; p++)
2786 if (*p == '\t') {
2787 if (tabsize > 0)
2788 j += tabsize - (j % tabsize);
2789 }
2790 else {
2791 j++;
2792 if (*p == '\n' || *p == '\r') {
2793 i += j;
2794 j = 0;
2795 }
2796 }
2797
2798 /* Second pass: create output string and fill it */
2799 u = PyString_FromStringAndSize(NULL, i + j);
2800 if (!u)
2801 return NULL;
2802
2803 j = 0;
2804 q = PyString_AS_STRING(u);
2805
2806 for (p = PyString_AS_STRING(self); p < e; p++)
2807 if (*p == '\t') {
2808 if (tabsize > 0) {
2809 i = tabsize - (j % tabsize);
2810 j += i;
2811 while (i--)
2812 *q++ = ' ';
2813 }
2814 }
2815 else {
2816 j++;
2817 *q++ = *p;
2818 if (*p == '\n' || *p == '\r')
2819 j = 0;
2820 }
2821
2822 return u;
2823}
2824
Tim Peters8fa5dd02001-09-12 02:18:30 +00002825static PyObject *
Martin v. Löwis18e16552006-02-15 17:27:45 +00002826pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002827{
2828 PyObject *u;
2829
2830 if (left < 0)
2831 left = 0;
2832 if (right < 0)
2833 right = 0;
2834
Tim Peters8fa5dd02001-09-12 02:18:30 +00002835 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002836 Py_INCREF(self);
2837 return (PyObject *)self;
2838 }
2839
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002840 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002841 left + PyString_GET_SIZE(self) + right);
2842 if (u) {
2843 if (left)
2844 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002845 memcpy(PyString_AS_STRING(u) + left,
2846 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002847 PyString_GET_SIZE(self));
2848 if (right)
2849 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2850 fill, right);
2851 }
2852
2853 return u;
2854}
2855
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002856PyDoc_STRVAR(ljust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002857"S.ljust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002858"\n"
2859"Return S left justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002860"done using the specified fill character (default is a space).");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002861
2862static PyObject *
2863string_ljust(PyStringObject *self, PyObject *args)
2864{
2865 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002866 char fillchar = ' ';
2867
2868 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002869 return NULL;
2870
Tim Peters8fa5dd02001-09-12 02:18:30 +00002871 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002872 Py_INCREF(self);
2873 return (PyObject*) self;
2874 }
2875
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002876 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002877}
2878
2879
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002880PyDoc_STRVAR(rjust__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002881"S.rjust(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002882"\n"
2883"Return S right justified in a string of length width. Padding is\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002884"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002885
2886static PyObject *
2887string_rjust(PyStringObject *self, PyObject *args)
2888{
2889 int width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002890 char fillchar = ' ';
2891
2892 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002893 return NULL;
2894
Tim Peters8fa5dd02001-09-12 02:18:30 +00002895 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002896 Py_INCREF(self);
2897 return (PyObject*) self;
2898 }
2899
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002900 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002901}
2902
2903
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002904PyDoc_STRVAR(center__doc__,
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002905"S.center(width[, fillchar]) -> string\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002906"\n"
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002907"Return S centered in a string of length width. Padding is\n"
2908"done using the specified fill character (default is a space)");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002909
2910static PyObject *
2911string_center(PyStringObject *self, PyObject *args)
2912{
Martin v. Löwis18e16552006-02-15 17:27:45 +00002913 Py_ssize_t marg, left;
2914 long width;
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002915 char fillchar = ' ';
Guido van Rossum4c08d552000-03-10 22:55:18 +00002916
Martin v. Löwis18e16552006-02-15 17:27:45 +00002917 if (!PyArg_ParseTuple(args, "l|c:center", &width, &fillchar))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002918 return NULL;
2919
Tim Peters8fa5dd02001-09-12 02:18:30 +00002920 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002921 Py_INCREF(self);
2922 return (PyObject*) self;
2923 }
2924
2925 marg = width - PyString_GET_SIZE(self);
2926 left = marg / 2 + (marg & width & 1);
2927
Raymond Hettinger4f8f9762003-11-26 08:21:35 +00002928 return pad(self, left, marg - left, fillchar);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002929}
2930
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002931PyDoc_STRVAR(zfill__doc__,
Walter Dörwald068325e2002-04-15 13:36:47 +00002932"S.zfill(width) -> string\n"
2933"\n"
2934"Pad a numeric string S with zeros on the left, to fill a field\n"
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002935"of the specified width. The string S is never truncated.");
Walter Dörwald068325e2002-04-15 13:36:47 +00002936
2937static PyObject *
2938string_zfill(PyStringObject *self, PyObject *args)
2939{
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002940 Py_ssize_t fill;
Walter Dörwald068325e2002-04-15 13:36:47 +00002941 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002942 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002943
Martin v. Löwiseb079f12006-02-16 14:32:27 +00002944 long width;
Martin v. Löwis18e16552006-02-15 17:27:45 +00002945 if (!PyArg_ParseTuple(args, "l:zfill", &width))
Walter Dörwald068325e2002-04-15 13:36:47 +00002946 return NULL;
2947
2948 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002949 if (PyString_CheckExact(self)) {
2950 Py_INCREF(self);
2951 return (PyObject*) self;
2952 }
2953 else
2954 return PyString_FromStringAndSize(
2955 PyString_AS_STRING(self),
2956 PyString_GET_SIZE(self)
2957 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002958 }
2959
2960 fill = width - PyString_GET_SIZE(self);
2961
2962 s = pad(self, fill, 0, '0');
2963
2964 if (s == NULL)
2965 return NULL;
2966
2967 p = PyString_AS_STRING(s);
2968 if (p[fill] == '+' || p[fill] == '-') {
2969 /* move sign to beginning of string */
2970 p[0] = p[fill];
2971 p[fill] = '0';
2972 }
2973
2974 return (PyObject*) s;
2975}
2976
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00002977PyDoc_STRVAR(isspace__doc__,
Martin v. Löwis6828e182003-10-18 09:55:08 +00002978"S.isspace() -> bool\n\
2979\n\
2980Return True if all characters in S are whitespace\n\
2981and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00002982
2983static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002984string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002985{
Fred Drakeba096332000-07-09 07:04:36 +00002986 register const unsigned char *p
2987 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002988 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002989
Guido van Rossum4c08d552000-03-10 22:55:18 +00002990 /* Shortcut for single character strings */
2991 if (PyString_GET_SIZE(self) == 1 &&
2992 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002993 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002994
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002995 /* Special case for empty strings */
2996 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002997 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002998
Guido van Rossum4c08d552000-03-10 22:55:18 +00002999 e = p + PyString_GET_SIZE(self);
3000 for (; p < e; p++) {
3001 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003002 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003003 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003004 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003005}
3006
3007
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003008PyDoc_STRVAR(isalpha__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003009"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003010\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003011Return True if all characters in S are alphabetic\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003012and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003013
3014static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003015string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003016{
Fred Drakeba096332000-07-09 07:04:36 +00003017 register const unsigned char *p
3018 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003019 register const unsigned char *e;
3020
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003021 /* Shortcut for single character strings */
3022 if (PyString_GET_SIZE(self) == 1 &&
3023 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003024 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003025
3026 /* Special case for empty strings */
3027 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003028 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003029
3030 e = p + PyString_GET_SIZE(self);
3031 for (; p < e; p++) {
3032 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003033 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003034 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003035 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003036}
3037
3038
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003039PyDoc_STRVAR(isalnum__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003040"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003041\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003042Return True if all characters in S are alphanumeric\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003043and there is at least one character in S, False otherwise.");
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003044
3045static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003046string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003047{
Fred Drakeba096332000-07-09 07:04:36 +00003048 register const unsigned char *p
3049 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003050 register const unsigned char *e;
3051
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003052 /* Shortcut for single character strings */
3053 if (PyString_GET_SIZE(self) == 1 &&
3054 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003055 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003056
3057 /* Special case for empty strings */
3058 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003059 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003060
3061 e = p + PyString_GET_SIZE(self);
3062 for (; p < e; p++) {
3063 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003064 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003065 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003066 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00003067}
3068
3069
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003070PyDoc_STRVAR(isdigit__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003071"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003072\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003073Return True if all characters in S are digits\n\
3074and there is at least one character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003075
3076static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003077string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003078{
Fred Drakeba096332000-07-09 07:04:36 +00003079 register const unsigned char *p
3080 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003081 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003082
Guido van Rossum4c08d552000-03-10 22:55:18 +00003083 /* Shortcut for single character strings */
3084 if (PyString_GET_SIZE(self) == 1 &&
3085 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003086 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003087
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003088 /* Special case for empty strings */
3089 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003090 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003091
Guido van Rossum4c08d552000-03-10 22:55:18 +00003092 e = p + PyString_GET_SIZE(self);
3093 for (; p < e; p++) {
3094 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003095 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003096 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003097 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003098}
3099
3100
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003101PyDoc_STRVAR(islower__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003102"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003103\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00003104Return True if all cased characters in S are lowercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003105at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003106
3107static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003108string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003109{
Fred Drakeba096332000-07-09 07:04:36 +00003110 register const unsigned char *p
3111 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003112 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003113 int cased;
3114
Guido van Rossum4c08d552000-03-10 22:55:18 +00003115 /* Shortcut for single character strings */
3116 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003117 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003118
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003119 /* Special case for empty strings */
3120 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003121 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003122
Guido van Rossum4c08d552000-03-10 22:55:18 +00003123 e = p + PyString_GET_SIZE(self);
3124 cased = 0;
3125 for (; p < e; p++) {
3126 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003127 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003128 else if (!cased && islower(*p))
3129 cased = 1;
3130 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003131 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003132}
3133
3134
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003135PyDoc_STRVAR(isupper__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003136"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003137\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003138Return True if all cased characters in S are uppercase and there is\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003139at least one cased character in S, False otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003140
3141static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003142string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003143{
Fred Drakeba096332000-07-09 07:04:36 +00003144 register const unsigned char *p
3145 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003146 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003147 int cased;
3148
Guido van Rossum4c08d552000-03-10 22:55:18 +00003149 /* Shortcut for single character strings */
3150 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003151 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003152
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003153 /* Special case for empty strings */
3154 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003155 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003156
Guido van Rossum4c08d552000-03-10 22:55:18 +00003157 e = p + PyString_GET_SIZE(self);
3158 cased = 0;
3159 for (; p < e; p++) {
3160 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00003161 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003162 else if (!cased && isupper(*p))
3163 cased = 1;
3164 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003165 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003166}
3167
3168
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003169PyDoc_STRVAR(istitle__doc__,
Guido van Rossum77f6a652002-04-03 22:41:51 +00003170"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003171\n\
Martin v. Löwis6828e182003-10-18 09:55:08 +00003172Return True if S is a titlecased string and there is at least one\n\
3173character in S, i.e. uppercase characters may only follow uncased\n\
3174characters and lowercase characters only cased ones. Return False\n\
3175otherwise.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003176
3177static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003178string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00003179{
Fred Drakeba096332000-07-09 07:04:36 +00003180 register const unsigned char *p
3181 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003182 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003183 int cased, previous_is_cased;
3184
Guido van Rossum4c08d552000-03-10 22:55:18 +00003185 /* Shortcut for single character strings */
3186 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003187 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003188
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003189 /* Special case for empty strings */
3190 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003191 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00003192
Guido van Rossum4c08d552000-03-10 22:55:18 +00003193 e = p + PyString_GET_SIZE(self);
3194 cased = 0;
3195 previous_is_cased = 0;
3196 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00003197 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003198
3199 if (isupper(ch)) {
3200 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003201 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003202 previous_is_cased = 1;
3203 cased = 1;
3204 }
3205 else if (islower(ch)) {
3206 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00003207 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003208 previous_is_cased = 1;
3209 cased = 1;
3210 }
3211 else
3212 previous_is_cased = 0;
3213 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00003214 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003215}
3216
3217
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003218PyDoc_STRVAR(splitlines__doc__,
Fred Drake2bae4fa2001-10-13 15:57:55 +00003219"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00003220\n\
3221Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003222Line breaks are not included in the resulting list unless keepends\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003223is given and true.");
Guido van Rossum4c08d552000-03-10 22:55:18 +00003224
Guido van Rossum4c08d552000-03-10 22:55:18 +00003225static PyObject*
3226string_splitlines(PyStringObject *self, PyObject *args)
3227{
Martin v. Löwis18e16552006-02-15 17:27:45 +00003228 register Py_ssize_t i;
3229 register Py_ssize_t j;
3230 Py_ssize_t len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003231 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003232 PyObject *list;
3233 PyObject *str;
3234 char *data;
3235
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003236 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00003237 return NULL;
3238
3239 data = PyString_AS_STRING(self);
3240 len = PyString_GET_SIZE(self);
3241
Guido van Rossum4c08d552000-03-10 22:55:18 +00003242 list = PyList_New(0);
3243 if (!list)
3244 goto onError;
3245
3246 for (i = j = 0; i < len; ) {
Martin v. Löwis18e16552006-02-15 17:27:45 +00003247 Py_ssize_t eol;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003248
Guido van Rossum4c08d552000-03-10 22:55:18 +00003249 /* Find a line and append it */
3250 while (i < len && data[i] != '\n' && data[i] != '\r')
3251 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003252
3253 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003254 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003255 if (i < len) {
3256 if (data[i] == '\r' && i + 1 < len &&
3257 data[i+1] == '\n')
3258 i += 2;
3259 else
3260 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003261 if (keepends)
3262 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00003263 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003264 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003265 j = i;
3266 }
3267 if (j < len) {
3268 SPLIT_APPEND(data, j, len);
3269 }
3270
3271 return list;
3272
3273 onError:
Hye-Shik Chang4af5c8c2006-03-07 15:39:21 +00003274 Py_XDECREF(list);
Guido van Rossum4c08d552000-03-10 22:55:18 +00003275 return NULL;
3276}
3277
3278#undef SPLIT_APPEND
3279
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003280static PyObject *
3281string_getnewargs(PyStringObject *v)
3282{
3283 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3284}
3285
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003286
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003287static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003288string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00003289 /* Counterparts of the obsolete stropmodule functions; except
3290 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003291 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3292 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
Hye-Shik Chang3ae811b2003-12-15 18:49:53 +00003293 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003294 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3295 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00003296 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3297 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3298 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3299 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3300 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3301 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3302 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003303 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3304 capitalize__doc__},
3305 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3306 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3307 endswith__doc__},
3308 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3309 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3310 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3311 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3312 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3313 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3314 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3315 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3316 startswith__doc__},
3317 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3318 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3319 swapcase__doc__},
3320 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3321 translate__doc__},
3322 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3323 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3324 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3325 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3326 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3327 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3328 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3329 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3330 expandtabs__doc__},
3331 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3332 splitlines__doc__},
Guido van Rossum5d9113d2003-01-29 17:58:45 +00003333 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003334 {NULL, NULL} /* sentinel */
3335};
3336
Jeremy Hylton938ace62002-07-17 16:30:39 +00003337static PyObject *
Guido van Rossumae960af2001-08-30 03:11:59 +00003338str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3339
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003340static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00003341string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003342{
Tim Peters6d6c1a32001-08-02 04:15:00 +00003343 PyObject *x = NULL;
Martin v. Löwis15e62742006-02-27 16:46:16 +00003344 static char *kwlist[] = {"object", 0};
Tim Peters6d6c1a32001-08-02 04:15:00 +00003345
Guido van Rossumae960af2001-08-30 03:11:59 +00003346 if (type != &PyString_Type)
3347 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00003348 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3349 return NULL;
3350 if (x == NULL)
3351 return PyString_FromString("");
3352 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003353}
3354
Guido van Rossumae960af2001-08-30 03:11:59 +00003355static PyObject *
3356str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3357{
Tim Petersaf90b3e2001-09-12 05:18:58 +00003358 PyObject *tmp, *pnew;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003359 Py_ssize_t n;
Guido van Rossumae960af2001-08-30 03:11:59 +00003360
3361 assert(PyType_IsSubtype(type, &PyString_Type));
3362 tmp = string_new(&PyString_Type, args, kwds);
3363 if (tmp == NULL)
3364 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00003365 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00003366 n = PyString_GET_SIZE(tmp);
3367 pnew = type->tp_alloc(type, n);
3368 if (pnew != NULL) {
3369 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003370 ((PyStringObject *)pnew)->ob_shash =
3371 ((PyStringObject *)tmp)->ob_shash;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00003372 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
Tim Petersaf90b3e2001-09-12 05:18:58 +00003373 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00003374 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00003375 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00003376}
3377
Guido van Rossumcacfc072002-05-24 19:01:59 +00003378static PyObject *
3379basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3380{
3381 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003382 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003383 return NULL;
3384}
3385
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003386static PyObject *
3387string_mod(PyObject *v, PyObject *w)
3388{
3389 if (!PyString_Check(v)) {
3390 Py_INCREF(Py_NotImplemented);
3391 return Py_NotImplemented;
3392 }
3393 return PyString_Format(v, w);
3394}
3395
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003396PyDoc_STRVAR(basestring_doc,
3397"Type basestring cannot be instantiated; it is the base for str and unicode.");
Guido van Rossumcacfc072002-05-24 19:01:59 +00003398
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003399static PyNumberMethods string_as_number = {
3400 0, /*nb_add*/
3401 0, /*nb_subtract*/
3402 0, /*nb_multiply*/
3403 0, /*nb_divide*/
3404 string_mod, /*nb_remainder*/
3405};
3406
3407
Guido van Rossumcacfc072002-05-24 19:01:59 +00003408PyTypeObject PyBaseString_Type = {
3409 PyObject_HEAD_INIT(&PyType_Type)
3410 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00003411 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00003412 0,
3413 0,
3414 0, /* tp_dealloc */
3415 0, /* tp_print */
3416 0, /* tp_getattr */
3417 0, /* tp_setattr */
3418 0, /* tp_compare */
3419 0, /* tp_repr */
3420 0, /* tp_as_number */
3421 0, /* tp_as_sequence */
3422 0, /* tp_as_mapping */
3423 0, /* tp_hash */
3424 0, /* tp_call */
3425 0, /* tp_str */
3426 0, /* tp_getattro */
3427 0, /* tp_setattro */
3428 0, /* tp_as_buffer */
3429 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3430 basestring_doc, /* tp_doc */
3431 0, /* tp_traverse */
3432 0, /* tp_clear */
3433 0, /* tp_richcompare */
3434 0, /* tp_weaklistoffset */
3435 0, /* tp_iter */
3436 0, /* tp_iternext */
3437 0, /* tp_methods */
3438 0, /* tp_members */
3439 0, /* tp_getset */
3440 &PyBaseObject_Type, /* tp_base */
3441 0, /* tp_dict */
3442 0, /* tp_descr_get */
3443 0, /* tp_descr_set */
3444 0, /* tp_dictoffset */
3445 0, /* tp_init */
3446 0, /* tp_alloc */
3447 basestring_new, /* tp_new */
3448 0, /* tp_free */
3449};
3450
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003451PyDoc_STRVAR(string_doc,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003452"str(object) -> string\n\
3453\n\
3454Return a nice string representation of the object.\n\
Martin v. Löwis14f8b4c2002-06-13 20:33:02 +00003455If the argument is a string, the return value is the same object.");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00003456
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003457PyTypeObject PyString_Type = {
3458 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003459 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00003460 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003461 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003462 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00003463 (destructor)string_dealloc, /* tp_dealloc */
3464 (printfunc)string_print, /* tp_print */
3465 0, /* tp_getattr */
3466 0, /* tp_setattr */
3467 0, /* tp_compare */
3468 (reprfunc)string_repr, /* tp_repr */
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003469 &string_as_number, /* tp_as_number */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003470 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003471 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003472 (hashfunc)string_hash, /* tp_hash */
3473 0, /* tp_call */
3474 (reprfunc)string_str, /* tp_str */
3475 PyObject_GenericGetAttr, /* tp_getattro */
3476 0, /* tp_setattro */
3477 &string_as_buffer, /* tp_as_buffer */
Tim Petersae1d0c92006-03-17 03:29:34 +00003478 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
Neil Schemenauera6cd4e62002-11-18 16:09:38 +00003479 Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003480 string_doc, /* tp_doc */
3481 0, /* tp_traverse */
3482 0, /* tp_clear */
3483 (richcmpfunc)string_richcompare, /* tp_richcompare */
3484 0, /* tp_weaklistoffset */
3485 0, /* tp_iter */
3486 0, /* tp_iternext */
3487 string_methods, /* tp_methods */
3488 0, /* tp_members */
3489 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003490 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003491 0, /* tp_dict */
3492 0, /* tp_descr_get */
3493 0, /* tp_descr_set */
3494 0, /* tp_dictoffset */
3495 0, /* tp_init */
3496 0, /* tp_alloc */
3497 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003498 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003499};
3500
3501void
Fred Drakeba096332000-07-09 07:04:36 +00003502PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003503{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003504 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003505 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003506 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003507 if (w == NULL || !PyString_Check(*pv)) {
3508 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003509 *pv = NULL;
3510 return;
3511 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003512 v = string_concat((PyStringObject *) *pv, w);
3513 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003514 *pv = v;
3515}
3516
Guido van Rossum013142a1994-08-30 08:19:36 +00003517void
Fred Drakeba096332000-07-09 07:04:36 +00003518PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003519{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003520 PyString_Concat(pv, w);
3521 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003522}
3523
3524
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003525/* The following function breaks the notion that strings are immutable:
3526 it changes the size of a string. We get away with this only if there
3527 is only one module referencing the object. You can also think of it
3528 as creating a new string object and destroying the old one, only
3529 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003530 already be known to some other part of the code...
3531 Note that if there's not enough memory to resize the string, the original
3532 string object at *pv is deallocated, *pv is set to NULL, an "out of
3533 memory" exception is set, and -1 is returned. Else (on success) 0 is
3534 returned, and the value in *pv may or may not be the same as on input.
3535 As always, an extra byte is allocated for a trailing \0 byte (newsize
3536 does *not* include that), and a trailing \0 byte is stored.
3537*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003538
3539int
Martin v. Löwis18e16552006-02-15 17:27:45 +00003540_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003541{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003542 register PyObject *v;
3543 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003544 v = *pv;
Armin Rigo618fbf52004-08-07 20:58:32 +00003545 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
3546 PyString_CHECK_INTERNED(v)) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003547 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003548 Py_DECREF(v);
3549 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003550 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003551 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003552 /* XXX UNREF/NEWREF interface should be more symmetrical */
Tim Peters34592512002-07-11 06:23:50 +00003553 _Py_DEC_REFTOTAL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003554 _Py_ForgetReference(v);
3555 *pv = (PyObject *)
Tim Peterse7c05322004-06-27 17:24:49 +00003556 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003557 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003558 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003559 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003560 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003561 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003562 _Py_NewReference(*pv);
3563 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003564 sv->ob_size = newsize;
3565 sv->ob_sval[newsize] = '\0';
Raymond Hettinger561fbf12004-10-26 01:52:37 +00003566 sv->ob_shash = -1; /* invalidate cached hash value */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003567 return 0;
3568}
Guido van Rossume5372401993-03-16 12:15:04 +00003569
3570/* Helpers for formatstring */
3571
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003572static PyObject *
Thomas Wouters977485d2006-02-16 15:59:12 +00003573getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003574{
Thomas Wouters977485d2006-02-16 15:59:12 +00003575 Py_ssize_t argidx = *p_argidx;
Guido van Rossume5372401993-03-16 12:15:04 +00003576 if (argidx < arglen) {
3577 (*p_argidx)++;
3578 if (arglen < 0)
3579 return args;
3580 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003581 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003582 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003583 PyErr_SetString(PyExc_TypeError,
3584 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003585 return NULL;
3586}
3587
Tim Peters38fd5b62000-09-21 05:43:11 +00003588/* Format codes
3589 * F_LJUST '-'
3590 * F_SIGN '+'
3591 * F_BLANK ' '
3592 * F_ALT '#'
3593 * F_ZERO '0'
3594 */
Guido van Rossume5372401993-03-16 12:15:04 +00003595#define F_LJUST (1<<0)
3596#define F_SIGN (1<<1)
3597#define F_BLANK (1<<2)
3598#define F_ALT (1<<3)
3599#define F_ZERO (1<<4)
3600
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003601static int
Fred Drakeba096332000-07-09 07:04:36 +00003602formatfloat(char *buf, size_t buflen, int flags,
3603 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003604{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003605 /* fmt = '%#.' + `prec` + `type`
3606 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003607 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003608 double x;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003609 x = PyFloat_AsDouble(v);
3610 if (x == -1.0 && PyErr_Occurred()) {
3611 PyErr_SetString(PyExc_TypeError, "float argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003612 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003613 }
Guido van Rossume5372401993-03-16 12:15:04 +00003614 if (prec < 0)
3615 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003616 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3617 type = 'g';
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003618 /* Worst case length calc to ensure no buffer overrun:
3619
3620 'g' formats:
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003621 fmt = %#.<prec>g
3622 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003623 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003624 len = 1 + prec + 1 + 2 + 5 = 9 + prec
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003625
3626 'f' formats:
3627 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
3628 len = 1 + 50 + 1 + prec = 52 + prec
3629
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003630 If prec=0 the effective precision is 1 (the leading digit is
Tim Petersae1d0c92006-03-17 03:29:34 +00003631 always given), therefore increase the length by one.
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003632
3633 */
3634 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
3635 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003636 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003637 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003638 return -1;
3639 }
Marc-André Lemburg79f57832002-12-29 19:44:06 +00003640 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3641 (flags&F_ALT) ? "#" : "",
3642 prec, type);
Martin v. Löwis737ea822004-06-08 18:52:54 +00003643 PyOS_ascii_formatd(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003644 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003645}
3646
Tim Peters38fd5b62000-09-21 05:43:11 +00003647/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3648 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3649 * Python's regular ints.
3650 * Return value: a new PyString*, or NULL if error.
3651 * . *pbuf is set to point into it,
3652 * *plen set to the # of chars following that.
3653 * Caller must decref it when done using pbuf.
3654 * The string starting at *pbuf is of the form
3655 * "-"? ("0x" | "0X")? digit+
3656 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003657 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003658 * There will be at least prec digits, zero-filled on the left if
3659 * necessary to get that many.
3660 * val object to be converted
3661 * flags bitmask of format flags; only F_ALT is looked at
3662 * prec minimum number of digits; 0-fill on left if needed
3663 * type a character in [duoxX]; u acts the same as d
3664 *
3665 * CAUTION: o, x and X conversions on regular ints can never
3666 * produce a '-' sign, but can for Python's unbounded ints.
3667 */
3668PyObject*
3669_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3670 char **pbuf, int *plen)
3671{
3672 PyObject *result = NULL;
3673 char *buf;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003674 Py_ssize_t i;
Tim Peters38fd5b62000-09-21 05:43:11 +00003675 int sign; /* 1 if '-', else 0 */
3676 int len; /* number of characters */
Martin v. Löwis725507b2006-03-07 12:08:51 +00003677 Py_ssize_t llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003678 int numdigits; /* len == numnondigits + numdigits */
3679 int numnondigits = 0;
3680
3681 switch (type) {
3682 case 'd':
3683 case 'u':
3684 result = val->ob_type->tp_str(val);
3685 break;
3686 case 'o':
3687 result = val->ob_type->tp_as_number->nb_oct(val);
3688 break;
3689 case 'x':
3690 case 'X':
3691 numnondigits = 2;
3692 result = val->ob_type->tp_as_number->nb_hex(val);
3693 break;
3694 default:
3695 assert(!"'type' not in [duoxX]");
3696 }
3697 if (!result)
3698 return NULL;
3699
3700 /* To modify the string in-place, there can only be one reference. */
3701 if (result->ob_refcnt != 1) {
3702 PyErr_BadInternalCall();
3703 return NULL;
3704 }
3705 buf = PyString_AsString(result);
Martin v. Löwis725507b2006-03-07 12:08:51 +00003706 llen = PyString_Size(result);
3707 if (llen > INT_MAX) {
3708 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
3709 return NULL;
3710 }
3711 len = (int)llen;
Tim Peters38fd5b62000-09-21 05:43:11 +00003712 if (buf[len-1] == 'L') {
3713 --len;
3714 buf[len] = '\0';
3715 }
3716 sign = buf[0] == '-';
3717 numnondigits += sign;
3718 numdigits = len - numnondigits;
3719 assert(numdigits > 0);
3720
Tim Petersfff53252001-04-12 18:38:48 +00003721 /* Get rid of base marker unless F_ALT */
3722 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003723 /* Need to skip 0x, 0X or 0. */
3724 int skipped = 0;
3725 switch (type) {
3726 case 'o':
3727 assert(buf[sign] == '0');
3728 /* If 0 is only digit, leave it alone. */
3729 if (numdigits > 1) {
3730 skipped = 1;
3731 --numdigits;
3732 }
3733 break;
3734 case 'x':
3735 case 'X':
3736 assert(buf[sign] == '0');
3737 assert(buf[sign + 1] == 'x');
3738 skipped = 2;
3739 numnondigits -= 2;
3740 break;
3741 }
3742 if (skipped) {
3743 buf += skipped;
3744 len -= skipped;
3745 if (sign)
3746 buf[0] = '-';
3747 }
3748 assert(len == numnondigits + numdigits);
3749 assert(numdigits > 0);
3750 }
3751
3752 /* Fill with leading zeroes to meet minimum width. */
3753 if (prec > numdigits) {
3754 PyObject *r1 = PyString_FromStringAndSize(NULL,
3755 numnondigits + prec);
3756 char *b1;
3757 if (!r1) {
3758 Py_DECREF(result);
3759 return NULL;
3760 }
3761 b1 = PyString_AS_STRING(r1);
3762 for (i = 0; i < numnondigits; ++i)
3763 *b1++ = *buf++;
3764 for (i = 0; i < prec - numdigits; i++)
3765 *b1++ = '0';
3766 for (i = 0; i < numdigits; i++)
3767 *b1++ = *buf++;
3768 *b1 = '\0';
3769 Py_DECREF(result);
3770 result = r1;
3771 buf = PyString_AS_STRING(result);
3772 len = numnondigits + prec;
3773 }
3774
3775 /* Fix up case for hex conversions. */
Raymond Hettinger3296e692005-06-29 23:29:56 +00003776 if (type == 'X') {
3777 /* Need to convert all lower case letters to upper case.
3778 and need to convert 0x to 0X (and -0x to -0X). */
Tim Peters38fd5b62000-09-21 05:43:11 +00003779 for (i = 0; i < len; i++)
Raymond Hettinger3296e692005-06-29 23:29:56 +00003780 if (buf[i] >= 'a' && buf[i] <= 'x')
3781 buf[i] -= 'a'-'A';
Tim Peters38fd5b62000-09-21 05:43:11 +00003782 }
3783 *pbuf = buf;
3784 *plen = len;
3785 return result;
3786}
3787
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003788static int
Fred Drakeba096332000-07-09 07:04:36 +00003789formatint(char *buf, size_t buflen, int flags,
3790 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003791{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003792 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003793 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3794 + 1 + 1 = 24 */
3795 char fmt[64]; /* plenty big enough! */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003796 char *sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003797 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003798
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003799 x = PyInt_AsLong(v);
3800 if (x == -1 && PyErr_Occurred()) {
3801 PyErr_SetString(PyExc_TypeError, "int argument required");
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003802 return -1;
Neal Norwitz88fe4ff2002-07-28 16:44:23 +00003803 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003804 if (x < 0 && type == 'u') {
3805 type = 'd';
Guido van Rossum078151d2002-08-11 04:24:12 +00003806 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003807 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
3808 sign = "-";
3809 else
3810 sign = "";
Guido van Rossume5372401993-03-16 12:15:04 +00003811 if (prec < 0)
3812 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003813
3814 if ((flags & F_ALT) &&
3815 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003816 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003817 * of issues that cause pain:
3818 * - when 0 is being converted, the C standard leaves off
3819 * the '0x' or '0X', which is inconsistent with other
3820 * %#x/%#X conversions and inconsistent with Python's
3821 * hex() function
3822 * - there are platforms that violate the standard and
3823 * convert 0 with the '0x' or '0X'
3824 * (Metrowerks, Compaq Tru64)
3825 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003826 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003827 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003828 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003829 * We can achieve the desired consistency by inserting our
3830 * own '0x' or '0X' prefix, and substituting %x/%X in place
3831 * of %#x/%#X.
3832 *
3833 * Note that this is the same approach as used in
3834 * formatint() in unicodeobject.c
3835 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003836 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
3837 sign, type, prec, type);
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003838 }
3839 else {
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003840 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
3841 sign, (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003842 prec, type);
3843 }
3844
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003845 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
3846 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003847 */
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003848 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003849 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003850 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003851 return -1;
3852 }
Guido van Rossum6c9e1302003-11-29 23:52:13 +00003853 if (sign[0])
3854 PyOS_snprintf(buf, buflen, fmt, -x);
3855 else
3856 PyOS_snprintf(buf, buflen, fmt, x);
Martin v. Löwis18e16552006-02-15 17:27:45 +00003857 return (int)strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003858}
3859
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003860static int
Fred Drakeba096332000-07-09 07:04:36 +00003861formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003862{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003863 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003864 if (PyString_Check(v)) {
3865 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003866 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003867 }
3868 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003869 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003870 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003871 }
3872 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003873 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003874}
3875
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003876/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3877
3878 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3879 chars are formatted. XXX This is a magic number. Each formatting
3880 routine does bounds checking to ensure no overflow, but a better
3881 solution may be to malloc a buffer of appropriate size for each
3882 format. For now, the current solution is sufficient.
3883*/
3884#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003885
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003886PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003887PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003888{
3889 char *fmt, *res;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00003890 Py_ssize_t arglen, argidx;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003891 Py_ssize_t reslen, rescnt, fmtcnt;
Guido van Rossum993952b1996-05-21 22:44:20 +00003892 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003893 PyObject *result, *orig_args;
3894#ifdef Py_USING_UNICODE
3895 PyObject *v, *w;
3896#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003897 PyObject *dict = NULL;
3898 if (format == NULL || !PyString_Check(format) || args == NULL) {
3899 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003900 return NULL;
3901 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003902 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003903 fmt = PyString_AS_STRING(format);
3904 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003905 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003906 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003907 if (result == NULL)
3908 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003909 res = PyString_AsString(result);
3910 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003911 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003912 argidx = 0;
3913 }
3914 else {
3915 arglen = -1;
3916 argidx = -2;
3917 }
Neal Norwitz80a1bf42002-11-12 23:01:12 +00003918 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
3919 !PyObject_TypeCheck(args, &PyBaseString_Type))
Guido van Rossum013142a1994-08-30 08:19:36 +00003920 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003921 while (--fmtcnt >= 0) {
3922 if (*fmt != '%') {
3923 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003924 rescnt = fmtcnt + 100;
3925 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003926 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003927 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003928 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003929 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003930 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003931 }
3932 *res++ = *fmt++;
3933 }
3934 else {
3935 /* Got a format specifier */
3936 int flags = 0;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003937 Py_ssize_t width = -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003938 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003939 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003940 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003941 PyObject *v = NULL;
3942 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003943 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003944 int sign;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003945 Py_ssize_t len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003946 char formatbuf[FORMATBUFLEN];
3947 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003948#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003949 char *fmt_start = fmt;
Martin v. Löwis725507b2006-03-07 12:08:51 +00003950 Py_ssize_t argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003951#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003952
Guido van Rossumda9c2711996-12-05 21:58:58 +00003953 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003954 if (*fmt == '(') {
3955 char *keystart;
Martin v. Löwis18e16552006-02-15 17:27:45 +00003956 Py_ssize_t keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003957 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003958 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003959
3960 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003961 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003962 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003963 goto error;
3964 }
3965 ++fmt;
3966 --fmtcnt;
3967 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003968 /* Skip over balanced parentheses */
3969 while (pcount > 0 && --fmtcnt >= 0) {
3970 if (*fmt == ')')
3971 --pcount;
3972 else if (*fmt == '(')
3973 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003974 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003975 }
3976 keylen = fmt - keystart - 1;
3977 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003978 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003979 "incomplete format key");
3980 goto error;
3981 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003982 key = PyString_FromStringAndSize(keystart,
3983 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003984 if (key == NULL)
3985 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003986 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003987 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003988 args_owned = 0;
3989 }
3990 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003991 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003992 if (args == NULL) {
3993 goto error;
3994 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003995 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003996 arglen = -1;
3997 argidx = -2;
3998 }
Guido van Rossume5372401993-03-16 12:15:04 +00003999 while (--fmtcnt >= 0) {
4000 switch (c = *fmt++) {
4001 case '-': flags |= F_LJUST; continue;
4002 case '+': flags |= F_SIGN; continue;
4003 case ' ': flags |= F_BLANK; continue;
4004 case '#': flags |= F_ALT; continue;
4005 case '0': flags |= F_ZERO; continue;
4006 }
4007 break;
4008 }
4009 if (c == '*') {
4010 v = getnextarg(args, arglen, &argidx);
4011 if (v == NULL)
4012 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004013 if (!PyInt_Check(v)) {
4014 PyErr_SetString(PyExc_TypeError,
4015 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004016 goto error;
4017 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004018 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00004019 if (width < 0) {
4020 flags |= F_LJUST;
4021 width = -width;
4022 }
Guido van Rossume5372401993-03-16 12:15:04 +00004023 if (--fmtcnt >= 0)
4024 c = *fmt++;
4025 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004026 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004027 width = c - '0';
4028 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004029 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004030 if (!isdigit(c))
4031 break;
4032 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004033 PyErr_SetString(
4034 PyExc_ValueError,
4035 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00004036 goto error;
4037 }
4038 width = width*10 + (c - '0');
4039 }
4040 }
4041 if (c == '.') {
4042 prec = 0;
4043 if (--fmtcnt >= 0)
4044 c = *fmt++;
4045 if (c == '*') {
4046 v = getnextarg(args, arglen, &argidx);
4047 if (v == NULL)
4048 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004049 if (!PyInt_Check(v)) {
4050 PyErr_SetString(
4051 PyExc_TypeError,
4052 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00004053 goto error;
4054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004055 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00004056 if (prec < 0)
4057 prec = 0;
4058 if (--fmtcnt >= 0)
4059 c = *fmt++;
4060 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004061 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004062 prec = c - '0';
4063 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004064 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00004065 if (!isdigit(c))
4066 break;
4067 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004068 PyErr_SetString(
4069 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00004070 "prec too big");
4071 goto error;
4072 }
4073 prec = prec*10 + (c - '0');
4074 }
4075 }
4076 } /* prec */
4077 if (fmtcnt >= 0) {
4078 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00004079 if (--fmtcnt >= 0)
4080 c = *fmt++;
4081 }
4082 }
4083 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004084 PyErr_SetString(PyExc_ValueError,
4085 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00004086 goto error;
4087 }
4088 if (c != '%') {
4089 v = getnextarg(args, arglen, &argidx);
4090 if (v == NULL)
4091 goto error;
4092 }
4093 sign = 0;
4094 fill = ' ';
4095 switch (c) {
4096 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004097 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00004098 len = 1;
4099 break;
4100 case 's':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004101#ifdef Py_USING_UNICODE
Neil Schemenauerab619232005-08-31 23:02:05 +00004102 if (PyUnicode_Check(v)) {
4103 fmt = fmt_start;
4104 argidx = argidx_start;
4105 goto unicode;
4106 }
Georg Brandld45014b2005-10-01 17:06:00 +00004107#endif
Neil Schemenauercf52c072005-08-12 17:34:58 +00004108 temp = _PyObject_Str(v);
Georg Brandld45014b2005-10-01 17:06:00 +00004109#ifdef Py_USING_UNICODE
Neil Schemenauercf52c072005-08-12 17:34:58 +00004110 if (temp != NULL && PyUnicode_Check(temp)) {
4111 Py_DECREF(temp);
Guido van Rossum90daa872000-04-10 13:47:21 +00004112 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004113 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00004114 goto unicode;
4115 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004116#endif
Guido van Rossumb00c07f2002-10-09 19:07:53 +00004117 /* Fall through */
Walter Dörwald9ff3f032003-06-18 14:17:01 +00004118 case 'r':
Neil Schemenauercf52c072005-08-12 17:34:58 +00004119 if (c == 'r')
Guido van Rossumf0b7b042000-04-11 15:39:26 +00004120 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00004121 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00004122 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004123 if (!PyString_Check(temp)) {
4124 PyErr_SetString(PyExc_TypeError,
Guido van Rossum8052f892002-10-09 19:14:30 +00004125 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00004126 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00004127 goto error;
4128 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00004129 pbuf = PyString_AS_STRING(temp);
4130 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004131 if (prec >= 0 && len > prec)
4132 len = prec;
4133 break;
4134 case 'i':
4135 case 'd':
4136 case 'u':
4137 case 'o':
4138 case 'x':
4139 case 'X':
4140 if (c == 'i')
4141 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00004142 if (PyLong_Check(v)) {
Martin v. Löwis725507b2006-03-07 12:08:51 +00004143 int ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004144 temp = _PyString_FormatLong(v, flags,
Martin v. Löwis725507b2006-03-07 12:08:51 +00004145 prec, c, &pbuf, &ilen);
4146 len = ilen;
Tim Peters38fd5b62000-09-21 05:43:11 +00004147 if (!temp)
4148 goto error;
Tim Peters38fd5b62000-09-21 05:43:11 +00004149 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00004150 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004151 else {
4152 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004153 len = formatint(pbuf,
4154 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00004155 flags, prec, c, v);
4156 if (len < 0)
4157 goto error;
Guido van Rossum6c9e1302003-11-29 23:52:13 +00004158 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004159 }
4160 if (flags & F_ZERO)
4161 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00004162 break;
4163 case 'e':
4164 case 'E':
4165 case 'f':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004166 case 'F':
Guido van Rossume5372401993-03-16 12:15:04 +00004167 case 'g':
4168 case 'G':
Raymond Hettinger9bfe5332003-08-27 04:55:52 +00004169 if (c == 'F')
4170 c = 'f';
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004171 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00004172 len = formatfloat(pbuf, sizeof(formatbuf),
4173 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004174 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004175 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004176 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00004177 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00004178 fill = '0';
4179 break;
4180 case 'c':
Walter Dörwald43440a62003-03-31 18:07:50 +00004181#ifdef Py_USING_UNICODE
4182 if (PyUnicode_Check(v)) {
4183 fmt = fmt_start;
4184 argidx = argidx_start;
4185 goto unicode;
4186 }
4187#endif
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004188 pbuf = formatbuf;
4189 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00004190 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004191 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00004192 break;
4193 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00004194 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00004195 "unsupported format character '%c' (0x%x) "
4196 "at index %i",
Guido van Rossumefc11882002-09-12 14:43:41 +00004197 c, c,
4198 (int)(fmt - 1 - PyString_AsString(format)));
Guido van Rossume5372401993-03-16 12:15:04 +00004199 goto error;
4200 }
4201 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004202 if (*pbuf == '-' || *pbuf == '+') {
4203 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00004204 len--;
4205 }
4206 else if (flags & F_SIGN)
4207 sign = '+';
4208 else if (flags & F_BLANK)
4209 sign = ' ';
4210 else
Tim Peters38fd5b62000-09-21 05:43:11 +00004211 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00004212 }
4213 if (width < len)
4214 width = len;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004215 if (rescnt - (sign != 0) < width) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00004216 reslen -= rescnt;
4217 rescnt = width + fmtcnt + 100;
4218 reslen += rescnt;
Guido van Rossum049cd6b2002-10-11 00:43:48 +00004219 if (reslen < 0) {
4220 Py_DECREF(result);
4221 return PyErr_NoMemory();
4222 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004223 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00004224 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00004225 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004226 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00004227 }
4228 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00004229 if (fill != ' ')
4230 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00004231 rescnt--;
4232 if (width > len)
4233 width--;
4234 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004235 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4236 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00004237 assert(pbuf[1] == c);
4238 if (fill != ' ') {
4239 *res++ = *pbuf++;
4240 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00004241 }
Tim Petersfff53252001-04-12 18:38:48 +00004242 rescnt -= 2;
4243 width -= 2;
4244 if (width < 0)
4245 width = 0;
4246 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00004247 }
4248 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00004249 do {
4250 --rescnt;
4251 *res++ = fill;
4252 } while (--width > len);
4253 }
Tim Peters38fd5b62000-09-21 05:43:11 +00004254 if (fill == ' ') {
4255 if (sign)
4256 *res++ = sign;
4257 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00004258 (c == 'x' || c == 'X')) {
4259 assert(pbuf[0] == '0');
4260 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00004261 *res++ = *pbuf++;
4262 *res++ = *pbuf++;
4263 }
4264 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00004265 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00004266 res += len;
4267 rescnt -= len;
4268 while (--width >= len) {
4269 --rescnt;
4270 *res++ = ' ';
4271 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00004272 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004273 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004274 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00004275 goto error;
4276 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004277 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00004278 } /* '%' */
4279 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00004280 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004281 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00004282 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00004283 goto error;
4284 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004285 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004286 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004287 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004288 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00004289 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00004290
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004291#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00004292 unicode:
4293 if (args_owned) {
4294 Py_DECREF(args);
4295 args_owned = 0;
4296 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00004297 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00004298 if (PyTuple_Check(orig_args) && argidx > 0) {
4299 PyObject *v;
Martin v. Löwiseb079f12006-02-16 14:32:27 +00004300 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
Guido van Rossum90daa872000-04-10 13:47:21 +00004301 v = PyTuple_New(n);
4302 if (v == NULL)
4303 goto error;
4304 while (--n >= 0) {
4305 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4306 Py_INCREF(w);
4307 PyTuple_SET_ITEM(v, n, w);
4308 }
4309 args = v;
4310 } else {
4311 Py_INCREF(orig_args);
4312 args = orig_args;
4313 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004314 args_owned = 1;
4315 /* Take what we have of the result and let the Unicode formatting
4316 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00004317 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004318 if (_PyString_Resize(&result, rescnt))
4319 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00004320 fmtcnt = PyString_GET_SIZE(format) - \
4321 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004322 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4323 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00004324 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004325 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00004326 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004327 if (v == NULL)
4328 goto error;
4329 /* Paste what we have (result) to what the Unicode formatting
4330 function returned (v) and return the result (or error) */
4331 w = PyUnicode_Concat(result, v);
4332 Py_DECREF(result);
4333 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00004334 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00004335 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00004336#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00004337
Guido van Rossume5372401993-03-16 12:15:04 +00004338 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004339 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004340 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004341 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00004342 }
Guido van Rossume5372401993-03-16 12:15:04 +00004343 return NULL;
4344}
Guido van Rossum2a61e741997-01-18 07:55:05 +00004345
Guido van Rossum2a61e741997-01-18 07:55:05 +00004346void
Fred Drakeba096332000-07-09 07:04:36 +00004347PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004348{
4349 register PyStringObject *s = (PyStringObject *)(*p);
4350 PyObject *t;
4351 if (s == NULL || !PyString_Check(s))
4352 Py_FatalError("PyString_InternInPlace: strings only please!");
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004353 /* If it's a string subclass, we don't really know what putting
4354 it in the interned dict might do. */
4355 if (!PyString_CheckExact(s))
4356 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004357 if (PyString_CHECK_INTERNED(s))
Guido van Rossum2a61e741997-01-18 07:55:05 +00004358 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004359 if (interned == NULL) {
4360 interned = PyDict_New();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004361 if (interned == NULL) {
4362 PyErr_Clear(); /* Don't leave an exception */
Guido van Rossum2a61e741997-01-18 07:55:05 +00004363 return;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004364 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00004365 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004366 t = PyDict_GetItem(interned, (PyObject *)s);
4367 if (t) {
Guido van Rossum2a61e741997-01-18 07:55:05 +00004368 Py_INCREF(t);
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004369 Py_DECREF(*p);
4370 *p = t;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004371 return;
4372 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004373
Armin Rigo79f7ad22004-08-07 19:27:39 +00004374 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004375 PyErr_Clear();
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004376 return;
4377 }
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004378 /* The two references in interned are not counted by refcnt.
4379 The string deallocator will take care of this */
Armin Rigo79f7ad22004-08-07 19:27:39 +00004380 s->ob_refcnt -= 2;
Jeremy Hylton4c989dd2004-08-07 19:20:05 +00004381 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
Guido van Rossum2a61e741997-01-18 07:55:05 +00004382}
4383
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004384void
4385PyString_InternImmortal(PyObject **p)
4386{
4387 PyString_InternInPlace(p);
4388 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4389 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4390 Py_INCREF(*p);
4391 }
4392}
4393
Guido van Rossum2a61e741997-01-18 07:55:05 +00004394
4395PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00004396PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00004397{
4398 PyObject *s = PyString_FromString(cp);
4399 if (s == NULL)
4400 return NULL;
4401 PyString_InternInPlace(&s);
4402 return s;
4403}
4404
Guido van Rossum8cf04761997-08-02 02:57:45 +00004405void
Fred Drakeba096332000-07-09 07:04:36 +00004406PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00004407{
4408 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004409 for (i = 0; i < UCHAR_MAX + 1; i++) {
4410 Py_XDECREF(characters[i]);
4411 characters[i] = NULL;
4412 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00004413 Py_XDECREF(nullstring);
4414 nullstring = NULL;
Guido van Rossum8cf04761997-08-02 02:57:45 +00004415}
Barry Warsawa903ad982001-02-23 16:40:48 +00004416
Barry Warsawa903ad982001-02-23 16:40:48 +00004417void _Py_ReleaseInternedStrings(void)
4418{
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004419 PyObject *keys;
4420 PyStringObject *s;
Martin v. Löwis18e16552006-02-15 17:27:45 +00004421 Py_ssize_t i, n;
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004422
4423 if (interned == NULL || !PyDict_Check(interned))
4424 return;
4425 keys = PyDict_Keys(interned);
4426 if (keys == NULL || !PyList_Check(keys)) {
4427 PyErr_Clear();
4428 return;
Barry Warsawa903ad982001-02-23 16:40:48 +00004429 }
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004430
4431 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4432 detector, interned strings are not forcibly deallocated; rather, we
4433 give them their stolen references back, and then clear and DECREF
4434 the interned dict. */
Tim Petersae1d0c92006-03-17 03:29:34 +00004435
Guido van Rossum45ec02a2002-08-19 21:43:18 +00004436 fprintf(stderr, "releasing interned strings\n");
4437 n = PyList_GET_SIZE(keys);
4438 for (i = 0; i < n; i++) {
4439 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4440 switch (s->ob_sstate) {
4441 case SSTATE_NOT_INTERNED:
4442 /* XXX Shouldn't happen */
4443 break;
4444 case SSTATE_INTERNED_IMMORTAL:
4445 s->ob_refcnt += 1;
4446 break;
4447 case SSTATE_INTERNED_MORTAL:
4448 s->ob_refcnt += 2;
4449 break;
4450 default:
4451 Py_FatalError("Inconsistent interned string state.");
4452 }
4453 s->ob_sstate = SSTATE_NOT_INTERNED;
4454 }
4455 Py_DECREF(keys);
4456 PyDict_Clear(interned);
4457 Py_DECREF(interned);
4458 interned = NULL;
Barry Warsawa903ad982001-02-23 16:40:48 +00004459}