blob: 4c285009628ec2257bef4f474c6604a092f207ac [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Barry Warsawdadace02001-08-24 18:32:06 +0000150PyObject *
151PyString_FromFormatV(const char *format, va_list vargs)
152{
153 va_list count = vargs;
154 int n = 0;
155 const char* f;
156 char *s;
157 PyObject* string;
158
159 /* step 1: figure out how large a buffer we need */
160 for (f = format; *f; f++) {
161 if (*f == '%') {
162 const char* p = f;
163 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
164 ;
165
166 /* skip the 'l' in %ld, since it doesn't change the
167 width. although only %d is supported (see
168 "expand" section below), others can be easily
169 add */
170 if (*f == 'l' && *(f+1) == 'd')
171 ++f;
172
173 switch (*f) {
174 case 'c':
175 (void)va_arg(count, int);
176 /* fall through... */
177 case '%':
178 n++;
179 break;
180 case 'd': case 'i': case 'x':
181 (void) va_arg(count, int);
182 /* 20 bytes should be enough to hold a 64-bit
183 integer */
184 n += 20;
185 break;
186 case 's':
187 s = va_arg(count, char*);
188 n += strlen(s);
189 break;
190 case 'p':
191 (void) va_arg(count, int);
192 /* maximum 64-bit pointer representation:
193 * 0xffffffffffffffff
194 * so 19 characters is enough.
195 */
196 n += 19;
197 break;
198 default:
199 /* if we stumble upon an unknown
200 formatting code, copy the rest of
201 the format string to the output
202 string. (we cannot just skip the
203 code, since there's no way to know
204 what's in the argument list) */
205 n += strlen(p);
206 goto expand;
207 }
208 } else
209 n++;
210 }
211 expand:
212 /* step 2: fill the buffer */
213 string = PyString_FromStringAndSize(NULL, n);
214 if (!string)
215 return NULL;
216
217 s = PyString_AsString(string);
218
219 for (f = format; *f; f++) {
220 if (*f == '%') {
221 const char* p = f++;
222 int i, longflag = 0;
223 /* parse the width.precision part (we're only
224 interested in the precision value, if any) */
225 n = 0;
226 while (isdigit(Py_CHARMASK(*f)))
227 n = (n*10) + *f++ - '0';
228 if (*f == '.') {
229 f++;
230 n = 0;
231 while (isdigit(Py_CHARMASK(*f)))
232 n = (n*10) + *f++ - '0';
233 }
234 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
235 f++;
236 /* handle the long flag, but only for %ld. others
237 can be added when necessary. */
238 if (*f == 'l' && *(f+1) == 'd') {
239 longflag = 1;
240 ++f;
241 }
242
243 switch (*f) {
244 case 'c':
245 *s++ = va_arg(vargs, int);
246 break;
247 case 'd':
248 if (longflag)
249 sprintf(s, "%ld", va_arg(vargs, long));
250 else
251 sprintf(s, "%d", va_arg(vargs, int));
252 s += strlen(s);
253 break;
254 case 'i':
255 sprintf(s, "%i", va_arg(vargs, int));
256 s += strlen(s);
257 break;
258 case 'x':
259 sprintf(s, "%x", va_arg(vargs, int));
260 s += strlen(s);
261 break;
262 case 's':
263 p = va_arg(vargs, char*);
264 i = strlen(p);
265 if (n > 0 && i > n)
266 i = n;
267 memcpy(s, p, i);
268 s += i;
269 break;
270 case 'p':
271 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000272 /* %p is ill-defined: ensure leading 0x. */
273 if (s[1] == 'X')
274 s[1] = 'x';
275 else if (s[1] != 'x') {
276 memmove(s+2, s, strlen(s)+1);
277 s[0] = '0';
278 s[1] = 'x';
279 }
Barry Warsawdadace02001-08-24 18:32:06 +0000280 s += strlen(s);
281 break;
282 case '%':
283 *s++ = '%';
284 break;
285 default:
286 strcpy(s, p);
287 s += strlen(s);
288 goto end;
289 }
290 } else
291 *s++ = *f;
292 }
293
294 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000295 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000296 return string;
297}
298
299PyObject *
300PyString_FromFormat(const char *format, ...)
301{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000302 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000303 va_list vargs;
304
305#ifdef HAVE_STDARG_PROTOTYPES
306 va_start(vargs, format);
307#else
308 va_start(vargs);
309#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000310 ret = PyString_FromFormatV(format, vargs);
311 va_end(vargs);
312 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000313}
314
315
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000316PyObject *PyString_Decode(const char *s,
317 int size,
318 const char *encoding,
319 const char *errors)
320{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000321 PyObject *v, *str;
322
323 str = PyString_FromStringAndSize(s, size);
324 if (str == NULL)
325 return NULL;
326 v = PyString_AsDecodedString(str, encoding, errors);
327 Py_DECREF(str);
328 return v;
329}
330
331PyObject *PyString_AsDecodedObject(PyObject *str,
332 const char *encoding,
333 const char *errors)
334{
335 PyObject *v;
336
337 if (!PyString_Check(str)) {
338 PyErr_BadArgument();
339 goto onError;
340 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000341
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000342 if (encoding == NULL) {
343#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000344 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000345#else
346 PyErr_SetString(PyExc_ValueError, "no encoding specified");
347 goto onError;
348#endif
349 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350
351 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000352 v = PyCodec_Decode(str, encoding, errors);
353 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000354 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000355
356 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000357
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000358 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000359 return NULL;
360}
361
362PyObject *PyString_AsDecodedString(PyObject *str,
363 const char *encoding,
364 const char *errors)
365{
366 PyObject *v;
367
368 v = PyString_AsDecodedObject(str, encoding, errors);
369 if (v == NULL)
370 goto onError;
371
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000372#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000373 /* Convert Unicode to a string using the default encoding */
374 if (PyUnicode_Check(v)) {
375 PyObject *temp = v;
376 v = PyUnicode_AsEncodedString(v, NULL, NULL);
377 Py_DECREF(temp);
378 if (v == NULL)
379 goto onError;
380 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000381#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000382 if (!PyString_Check(v)) {
383 PyErr_Format(PyExc_TypeError,
384 "decoder did not return a string object (type=%.400s)",
385 v->ob_type->tp_name);
386 Py_DECREF(v);
387 goto onError;
388 }
389
390 return v;
391
392 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000393 return NULL;
394}
395
396PyObject *PyString_Encode(const char *s,
397 int size,
398 const char *encoding,
399 const char *errors)
400{
401 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000402
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000403 str = PyString_FromStringAndSize(s, size);
404 if (str == NULL)
405 return NULL;
406 v = PyString_AsEncodedString(str, encoding, errors);
407 Py_DECREF(str);
408 return v;
409}
410
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000411PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000412 const char *encoding,
413 const char *errors)
414{
415 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000416
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000417 if (!PyString_Check(str)) {
418 PyErr_BadArgument();
419 goto onError;
420 }
421
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000422 if (encoding == NULL) {
423#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000424 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000425#else
426 PyErr_SetString(PyExc_ValueError, "no encoding specified");
427 goto onError;
428#endif
429 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430
431 /* Encode via the codec registry */
432 v = PyCodec_Encode(str, encoding, errors);
433 if (v == NULL)
434 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000435
436 return v;
437
438 onError:
439 return NULL;
440}
441
442PyObject *PyString_AsEncodedString(PyObject *str,
443 const char *encoding,
444 const char *errors)
445{
446 PyObject *v;
447
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000448 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000449 if (v == NULL)
450 goto onError;
451
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000452#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000453 /* Convert Unicode to a string using the default encoding */
454 if (PyUnicode_Check(v)) {
455 PyObject *temp = v;
456 v = PyUnicode_AsEncodedString(v, NULL, NULL);
457 Py_DECREF(temp);
458 if (v == NULL)
459 goto onError;
460 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000461#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000462 if (!PyString_Check(v)) {
463 PyErr_Format(PyExc_TypeError,
464 "encoder did not return a string object (type=%.400s)",
465 v->ob_type->tp_name);
466 Py_DECREF(v);
467 goto onError;
468 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000469
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000470 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000471
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000472 onError:
473 return NULL;
474}
475
Guido van Rossum234f9421993-06-17 12:35:49 +0000476static void
Fred Drakeba096332000-07-09 07:04:36 +0000477string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000478{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000479 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000480}
481
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000482static int
483string_getsize(register PyObject *op)
484{
485 char *s;
486 int len;
487 if (PyString_AsStringAndSize(op, &s, &len))
488 return -1;
489 return len;
490}
491
492static /*const*/ char *
493string_getbuffer(register PyObject *op)
494{
495 char *s;
496 int len;
497 if (PyString_AsStringAndSize(op, &s, &len))
498 return NULL;
499 return s;
500}
501
Guido van Rossumd7047b31995-01-02 19:07:15 +0000502int
Fred Drakeba096332000-07-09 07:04:36 +0000503PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000505 if (!PyString_Check(op))
506 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000507 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000508}
509
510/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000511PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000512{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000513 if (!PyString_Check(op))
514 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000515 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000516}
517
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000518int
519PyString_AsStringAndSize(register PyObject *obj,
520 register char **s,
521 register int *len)
522{
523 if (s == NULL) {
524 PyErr_BadInternalCall();
525 return -1;
526 }
527
528 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000529#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000530 if (PyUnicode_Check(obj)) {
531 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
532 if (obj == NULL)
533 return -1;
534 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535 else
536#endif
537 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000538 PyErr_Format(PyExc_TypeError,
539 "expected string or Unicode object, "
540 "%.200s found", obj->ob_type->tp_name);
541 return -1;
542 }
543 }
544
545 *s = PyString_AS_STRING(obj);
546 if (len != NULL)
547 *len = PyString_GET_SIZE(obj);
548 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
549 PyErr_SetString(PyExc_TypeError,
550 "expected string without null bytes");
551 return -1;
552 }
553 return 0;
554}
555
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556/* Methods */
557
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000558static int
Fred Drakeba096332000-07-09 07:04:36 +0000559string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000560{
561 int i;
562 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000563 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000567 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569
Thomas Wouters7e474022000-07-16 12:04:32 +0000570 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000571 quote = '\'';
572 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
573 quote = '"';
574
575 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000576 for (i = 0; i < op->ob_size; i++) {
577 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000578 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000579 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000580 else if (c == '\t')
581 fprintf(fp, "\\t");
582 else if (c == '\n')
583 fprintf(fp, "\\n");
584 else if (c == '\r')
585 fprintf(fp, "\\r");
586 else if (c < ' ' || c >= 0x7f)
587 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000588 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000589 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000590 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000592 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000593}
594
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000595static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000596string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000598 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
599 PyObject *v;
600 if (newsize > INT_MAX) {
601 PyErr_SetString(PyExc_OverflowError,
602 "string is too large to make repr");
603 }
604 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000605 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000606 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000607 }
608 else {
609 register int i;
610 register char c;
611 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000612 int quote;
613
Thomas Wouters7e474022000-07-16 12:04:32 +0000614 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000615 quote = '\'';
616 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
617 quote = '"';
618
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000619 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000620 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000621 for (i = 0; i < op->ob_size; i++) {
622 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000623 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000624 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000625 else if (c == '\t')
626 *p++ = '\\', *p++ = 't';
627 else if (c == '\n')
628 *p++ = '\\', *p++ = 'n';
629 else if (c == '\r')
630 *p++ = '\\', *p++ = 'r';
631 else if (c < ' ' || c >= 0x7f) {
632 sprintf(p, "\\x%02x", c & 0xff);
633 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000634 }
635 else
636 *p++ = c;
637 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000638 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000639 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000640 _PyString_Resize(
641 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000642 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000643 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644}
645
Guido van Rossum189f1df2001-05-01 16:51:53 +0000646static PyObject *
647string_str(PyObject *s)
648{
649 Py_INCREF(s);
650 return s;
651}
652
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000653static int
Fred Drakeba096332000-07-09 07:04:36 +0000654string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000655{
656 return a->ob_size;
657}
658
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000659static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000660string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000661{
662 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000663 register PyStringObject *op;
664 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000665#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000666 if (PyUnicode_Check(bb))
667 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000668#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000669 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000670 "cannot add type \"%.200s\" to string",
671 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000672 return NULL;
673 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000674#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000675 /* Optimize cases with empty left or right operand */
676 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000677 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000678 return bb;
679 }
680 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000681 Py_INCREF(a);
682 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000683 }
684 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000685 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000686 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000687 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000688 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000689 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000690 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000691#ifdef CACHE_HASH
692 op->ob_shash = -1;
693#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000694#ifdef INTERN_STRINGS
695 op->ob_sinterned = NULL;
696#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000697 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
698 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
699 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000700 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701#undef b
702}
703
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000704static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000705string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706{
707 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000708 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000709 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000710 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000711 if (n < 0)
712 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000713 /* watch out for overflows: the size can overflow int,
714 * and the # of bytes needed can overflow size_t
715 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000716 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000717 if (n && size / n != a->ob_size) {
718 PyErr_SetString(PyExc_OverflowError,
719 "repeated string is too long");
720 return NULL;
721 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000722 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000723 Py_INCREF(a);
724 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000725 }
Tim Peters8f422462000-09-09 06:13:41 +0000726 nbytes = size * sizeof(char);
727 if (nbytes / sizeof(char) != (size_t)size ||
728 nbytes + sizeof(PyStringObject) <= nbytes) {
729 PyErr_SetString(PyExc_OverflowError,
730 "repeated string is too long");
731 return NULL;
732 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000733 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000734 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000735 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000736 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000737 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000738#ifdef CACHE_HASH
739 op->ob_shash = -1;
740#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000741#ifdef INTERN_STRINGS
742 op->ob_sinterned = NULL;
743#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000744 for (i = 0; i < size; i += a->ob_size)
745 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
746 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000747 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000748}
749
750/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
751
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000752static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000753string_slice(register PyStringObject *a, register int i, register int j)
754 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000755{
756 if (i < 0)
757 i = 0;
758 if (j < 0)
759 j = 0; /* Avoid signed/unsigned bug in next line */
760 if (j > a->ob_size)
761 j = a->ob_size;
762 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000763 Py_INCREF(a);
764 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000765 }
766 if (j < i)
767 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000768 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000769}
770
Guido van Rossum9284a572000-03-07 15:53:43 +0000771static int
Fred Drakeba096332000-07-09 07:04:36 +0000772string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000773{
774 register char *s, *end;
775 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000776#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000777 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000778 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000779#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000780 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000781 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000782 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000783 return -1;
784 }
785 c = PyString_AsString(el)[0];
786 s = PyString_AsString(a);
787 end = s + PyString_Size(a);
788 while (s < end) {
789 if (c == *s++)
790 return 1;
791 }
792 return 0;
793}
794
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000795static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000796string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000798 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000799 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000800 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000801 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000802 return NULL;
803 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000804 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000805 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000806 if (v == NULL)
807 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000808 else {
809#ifdef COUNT_ALLOCS
810 one_strings++;
811#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000812 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000813 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000814 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000815}
816
Martin v. Löwiscd353062001-05-24 16:56:35 +0000817static PyObject*
818string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000819{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000820 int c;
821 int len_a, len_b;
822 int min_len;
823 PyObject *result;
824
825 /* One of the objects is a string object. Make sure the
826 other one is one, too. */
827 if (a->ob_type != b->ob_type) {
828 result = Py_NotImplemented;
829 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000830 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000831 if (a == b) {
832 switch (op) {
833 case Py_EQ:case Py_LE:case Py_GE:
834 result = Py_True;
835 goto out;
836 case Py_NE:case Py_LT:case Py_GT:
837 result = Py_False;
838 goto out;
839 }
840 }
841 if (op == Py_EQ) {
842 /* Supporting Py_NE here as well does not save
843 much time, since Py_NE is rarely used. */
844 if (a->ob_size == b->ob_size
845 && (a->ob_sval[0] == b->ob_sval[0]
846 && memcmp(a->ob_sval, b->ob_sval,
847 a->ob_size) == 0)) {
848 result = Py_True;
849 } else {
850 result = Py_False;
851 }
852 goto out;
853 }
854 len_a = a->ob_size; len_b = b->ob_size;
855 min_len = (len_a < len_b) ? len_a : len_b;
856 if (min_len > 0) {
857 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
858 if (c==0)
859 c = memcmp(a->ob_sval, b->ob_sval, min_len);
860 }else
861 c = 0;
862 if (c == 0)
863 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
864 switch (op) {
865 case Py_LT: c = c < 0; break;
866 case Py_LE: c = c <= 0; break;
867 case Py_EQ: assert(0); break; /* unreachable */
868 case Py_NE: c = c != 0; break;
869 case Py_GT: c = c > 0; break;
870 case Py_GE: c = c >= 0; break;
871 default:
872 result = Py_NotImplemented;
873 goto out;
874 }
875 result = c ? Py_True : Py_False;
876 out:
877 Py_INCREF(result);
878 return result;
879}
880
881int
882_PyString_Eq(PyObject *o1, PyObject *o2)
883{
884 PyStringObject *a, *b;
885 a = (PyStringObject*)o1;
886 b = (PyStringObject*)o2;
887 return a->ob_size == b->ob_size
888 && *a->ob_sval == *b->ob_sval
889 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000890}
891
Guido van Rossum9bfef441993-03-29 10:43:31 +0000892static long
Fred Drakeba096332000-07-09 07:04:36 +0000893string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000894{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000895 register int len;
896 register unsigned char *p;
897 register long x;
898
899#ifdef CACHE_HASH
900 if (a->ob_shash != -1)
901 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000902#ifdef INTERN_STRINGS
903 if (a->ob_sinterned != NULL)
904 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000905 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000906#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000907#endif
908 len = a->ob_size;
909 p = (unsigned char *) a->ob_sval;
910 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000911 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000912 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000913 x ^= a->ob_size;
914 if (x == -1)
915 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000916#ifdef CACHE_HASH
917 a->ob_shash = x;
918#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919 return x;
920}
921
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000922static int
Fred Drakeba096332000-07-09 07:04:36 +0000923string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000924{
925 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000926 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000927 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000928 return -1;
929 }
930 *ptr = (void *)self->ob_sval;
931 return self->ob_size;
932}
933
934static int
Fred Drakeba096332000-07-09 07:04:36 +0000935string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000936{
Guido van Rossum045e6881997-09-08 18:30:11 +0000937 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000938 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000939 return -1;
940}
941
942static int
Fred Drakeba096332000-07-09 07:04:36 +0000943string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000944{
945 if ( lenp )
946 *lenp = self->ob_size;
947 return 1;
948}
949
Guido van Rossum1db70701998-10-08 02:18:52 +0000950static int
Fred Drakeba096332000-07-09 07:04:36 +0000951string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000952{
953 if ( index != 0 ) {
954 PyErr_SetString(PyExc_SystemError,
955 "accessing non-existent string segment");
956 return -1;
957 }
958 *ptr = self->ob_sval;
959 return self->ob_size;
960}
961
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000962static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000963 (inquiry)string_length, /*sq_length*/
964 (binaryfunc)string_concat, /*sq_concat*/
965 (intargfunc)string_repeat, /*sq_repeat*/
966 (intargfunc)string_item, /*sq_item*/
967 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000968 0, /*sq_ass_item*/
969 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000970 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000971};
972
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000973static PyBufferProcs string_as_buffer = {
974 (getreadbufferproc)string_buffer_getreadbuf,
975 (getwritebufferproc)string_buffer_getwritebuf,
976 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000977 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000978};
979
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000980
981
982#define LEFTSTRIP 0
983#define RIGHTSTRIP 1
984#define BOTHSTRIP 2
985
986
987static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000988split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000989{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000990 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000991 PyObject* item;
992 PyObject *list = PyList_New(0);
993
994 if (list == NULL)
995 return NULL;
996
Guido van Rossum4c08d552000-03-10 22:55:18 +0000997 for (i = j = 0; i < len; ) {
998 while (i < len && isspace(Py_CHARMASK(s[i])))
999 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001000 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001001 while (i < len && !isspace(Py_CHARMASK(s[i])))
1002 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001003 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001004 if (maxsplit-- <= 0)
1005 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1007 if (item == NULL)
1008 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001009 err = PyList_Append(list, item);
1010 Py_DECREF(item);
1011 if (err < 0)
1012 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001013 while (i < len && isspace(Py_CHARMASK(s[i])))
1014 i++;
1015 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001016 }
1017 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001018 if (j < len) {
1019 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1020 if (item == NULL)
1021 goto finally;
1022 err = PyList_Append(list, item);
1023 Py_DECREF(item);
1024 if (err < 0)
1025 goto finally;
1026 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001027 return list;
1028 finally:
1029 Py_DECREF(list);
1030 return NULL;
1031}
1032
1033
1034static char split__doc__[] =
1035"S.split([sep [,maxsplit]]) -> list of strings\n\
1036\n\
1037Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001038delimiter string. If maxsplit is given, at most maxsplit\n\
1039splits are done. If sep is not specified, any whitespace string\n\
1040is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001041
1042static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001043string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001044{
1045 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001046 int maxsplit = -1;
1047 const char *s = PyString_AS_STRING(self), *sub;
1048 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001049
Guido van Rossum4c08d552000-03-10 22:55:18 +00001050 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001051 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001052 if (maxsplit < 0)
1053 maxsplit = INT_MAX;
1054 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001056 if (PyString_Check(subobj)) {
1057 sub = PyString_AS_STRING(subobj);
1058 n = PyString_GET_SIZE(subobj);
1059 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001060#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001061 else if (PyUnicode_Check(subobj))
1062 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001063#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001064 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1065 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001066 if (n == 0) {
1067 PyErr_SetString(PyExc_ValueError, "empty separator");
1068 return NULL;
1069 }
1070
1071 list = PyList_New(0);
1072 if (list == NULL)
1073 return NULL;
1074
1075 i = j = 0;
1076 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001077 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001078 if (maxsplit-- <= 0)
1079 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001080 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1081 if (item == NULL)
1082 goto fail;
1083 err = PyList_Append(list, item);
1084 Py_DECREF(item);
1085 if (err < 0)
1086 goto fail;
1087 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001088 }
1089 else
1090 i++;
1091 }
1092 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1093 if (item == NULL)
1094 goto fail;
1095 err = PyList_Append(list, item);
1096 Py_DECREF(item);
1097 if (err < 0)
1098 goto fail;
1099
1100 return list;
1101
1102 fail:
1103 Py_DECREF(list);
1104 return NULL;
1105}
1106
1107
1108static char join__doc__[] =
1109"S.join(sequence) -> string\n\
1110\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001111Return a string which is the concatenation of the strings in the\n\
1112sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113
1114static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001115string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001116{
1117 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001118 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001119 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001120 char *p;
1121 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001122 size_t sz = 0;
1123 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001124 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001125
Tim Peters19fe14e2001-01-19 03:03:47 +00001126 seq = PySequence_Fast(orig, "");
1127 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001128 if (PyErr_ExceptionMatches(PyExc_TypeError))
1129 PyErr_Format(PyExc_TypeError,
1130 "sequence expected, %.80s found",
1131 orig->ob_type->tp_name);
1132 return NULL;
1133 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001134
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001135 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001136 if (seqlen == 0) {
1137 Py_DECREF(seq);
1138 return PyString_FromString("");
1139 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001140 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001141 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001142 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1143 PyErr_Format(PyExc_TypeError,
1144 "sequence item 0: expected string,"
1145 " %.80s found",
1146 item->ob_type->tp_name);
1147 Py_DECREF(seq);
1148 return NULL;
1149 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001150 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001151 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001152 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001153 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001154
Tim Peters19fe14e2001-01-19 03:03:47 +00001155 /* There are at least two things to join. Do a pre-pass to figure out
1156 * the total amount of space we'll need (sz), see whether any argument
1157 * is absurd, and defer to the Unicode join if appropriate.
1158 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001159 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001160 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001161 item = PySequence_Fast_GET_ITEM(seq, i);
1162 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001163#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001164 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001165 /* Defer to Unicode join.
1166 * CAUTION: There's no gurantee that the
1167 * original sequence can be iterated over
1168 * again, so we must pass seq here.
1169 */
1170 PyObject *result;
1171 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001172 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001173 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001174 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001175#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001176 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001177 "sequence item %i: expected string,"
1178 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001179 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001180 Py_DECREF(seq);
1181 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001182 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001183 sz += PyString_GET_SIZE(item);
1184 if (i != 0)
1185 sz += seplen;
1186 if (sz < old_sz || sz > INT_MAX) {
1187 PyErr_SetString(PyExc_OverflowError,
1188 "join() is too long for a Python string");
1189 Py_DECREF(seq);
1190 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001191 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001192 }
1193
1194 /* Allocate result space. */
1195 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1196 if (res == NULL) {
1197 Py_DECREF(seq);
1198 return NULL;
1199 }
1200
1201 /* Catenate everything. */
1202 p = PyString_AS_STRING(res);
1203 for (i = 0; i < seqlen; ++i) {
1204 size_t n;
1205 item = PySequence_Fast_GET_ITEM(seq, i);
1206 n = PyString_GET_SIZE(item);
1207 memcpy(p, PyString_AS_STRING(item), n);
1208 p += n;
1209 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001210 memcpy(p, sep, seplen);
1211 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001212 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001214
Jeremy Hylton49048292000-07-11 03:28:17 +00001215 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001216 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001217}
1218
Tim Peters52e155e2001-06-16 05:42:57 +00001219PyObject *
1220_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001221{
Tim Petersa7259592001-06-16 05:11:17 +00001222 assert(sep != NULL && PyString_Check(sep));
1223 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001224 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001225}
1226
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001227static long
Fred Drakeba096332000-07-09 07:04:36 +00001228string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001229{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001230 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001231 int len = PyString_GET_SIZE(self);
1232 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001233 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001234
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001235 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001236 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001237 return -2;
1238 if (PyString_Check(subobj)) {
1239 sub = PyString_AS_STRING(subobj);
1240 n = PyString_GET_SIZE(subobj);
1241 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001242#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001243 else if (PyUnicode_Check(subobj))
1244 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001245#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001246 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001247 return -2;
1248
1249 if (last > len)
1250 last = len;
1251 if (last < 0)
1252 last += len;
1253 if (last < 0)
1254 last = 0;
1255 if (i < 0)
1256 i += len;
1257 if (i < 0)
1258 i = 0;
1259
Guido van Rossum4c08d552000-03-10 22:55:18 +00001260 if (dir > 0) {
1261 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001262 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001263 last -= n;
1264 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001265 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001266 return (long)i;
1267 }
1268 else {
1269 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001270
Guido van Rossum4c08d552000-03-10 22:55:18 +00001271 if (n == 0 && i <= last)
1272 return (long)last;
1273 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001274 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001275 return (long)j;
1276 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001277
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278 return -1;
1279}
1280
1281
1282static char find__doc__[] =
1283"S.find(sub [,start [,end]]) -> int\n\
1284\n\
1285Return the lowest index in S where substring sub is found,\n\
1286such that sub is contained within s[start,end]. Optional\n\
1287arguments start and end are interpreted as in slice notation.\n\
1288\n\
1289Return -1 on failure.";
1290
1291static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001292string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001293{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001294 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001295 if (result == -2)
1296 return NULL;
1297 return PyInt_FromLong(result);
1298}
1299
1300
1301static char index__doc__[] =
1302"S.index(sub [,start [,end]]) -> int\n\
1303\n\
1304Like S.find() but raise ValueError when the substring is not found.";
1305
1306static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001307string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001308{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001309 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001310 if (result == -2)
1311 return NULL;
1312 if (result == -1) {
1313 PyErr_SetString(PyExc_ValueError,
1314 "substring not found in string.index");
1315 return NULL;
1316 }
1317 return PyInt_FromLong(result);
1318}
1319
1320
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321static char rfind__doc__[] =
1322"S.rfind(sub [,start [,end]]) -> int\n\
1323\n\
1324Return the highest index in S where substring sub is found,\n\
1325such that sub is contained within s[start,end]. Optional\n\
1326arguments start and end are interpreted as in slice notation.\n\
1327\n\
1328Return -1 on failure.";
1329
1330static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001331string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001333 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 if (result == -2)
1335 return NULL;
1336 return PyInt_FromLong(result);
1337}
1338
1339
1340static char rindex__doc__[] =
1341"S.rindex(sub [,start [,end]]) -> int\n\
1342\n\
1343Like S.rfind() but raise ValueError when the substring is not found.";
1344
1345static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001346string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001348 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001349 if (result == -2)
1350 return NULL;
1351 if (result == -1) {
1352 PyErr_SetString(PyExc_ValueError,
1353 "substring not found in string.rindex");
1354 return NULL;
1355 }
1356 return PyInt_FromLong(result);
1357}
1358
1359
1360static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001361do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001362{
1363 char *s = PyString_AS_STRING(self);
1364 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001366 i = 0;
1367 if (striptype != RIGHTSTRIP) {
1368 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1369 i++;
1370 }
1371 }
1372
1373 j = len;
1374 if (striptype != LEFTSTRIP) {
1375 do {
1376 j--;
1377 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1378 j++;
1379 }
1380
1381 if (i == 0 && j == len) {
1382 Py_INCREF(self);
1383 return (PyObject*)self;
1384 }
1385 else
1386 return PyString_FromStringAndSize(s+i, j-i);
1387}
1388
1389
1390static char strip__doc__[] =
1391"S.strip() -> string\n\
1392\n\
1393Return a copy of the string S with leading and trailing\n\
1394whitespace removed.";
1395
1396static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001397string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001399 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400}
1401
1402
1403static char lstrip__doc__[] =
1404"S.lstrip() -> string\n\
1405\n\
1406Return a copy of the string S with leading whitespace removed.";
1407
1408static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001409string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001410{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001411 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001412}
1413
1414
1415static char rstrip__doc__[] =
1416"S.rstrip() -> string\n\
1417\n\
1418Return a copy of the string S with trailing whitespace removed.";
1419
1420static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001421string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001423 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424}
1425
1426
1427static char lower__doc__[] =
1428"S.lower() -> string\n\
1429\n\
1430Return a copy of the string S converted to lowercase.";
1431
1432static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001433string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001434{
1435 char *s = PyString_AS_STRING(self), *s_new;
1436 int i, n = PyString_GET_SIZE(self);
1437 PyObject *new;
1438
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439 new = PyString_FromStringAndSize(NULL, n);
1440 if (new == NULL)
1441 return NULL;
1442 s_new = PyString_AsString(new);
1443 for (i = 0; i < n; i++) {
1444 int c = Py_CHARMASK(*s++);
1445 if (isupper(c)) {
1446 *s_new = tolower(c);
1447 } else
1448 *s_new = c;
1449 s_new++;
1450 }
1451 return new;
1452}
1453
1454
1455static char upper__doc__[] =
1456"S.upper() -> string\n\
1457\n\
1458Return a copy of the string S converted to uppercase.";
1459
1460static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001461string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001462{
1463 char *s = PyString_AS_STRING(self), *s_new;
1464 int i, n = PyString_GET_SIZE(self);
1465 PyObject *new;
1466
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001467 new = PyString_FromStringAndSize(NULL, n);
1468 if (new == NULL)
1469 return NULL;
1470 s_new = PyString_AsString(new);
1471 for (i = 0; i < n; i++) {
1472 int c = Py_CHARMASK(*s++);
1473 if (islower(c)) {
1474 *s_new = toupper(c);
1475 } else
1476 *s_new = c;
1477 s_new++;
1478 }
1479 return new;
1480}
1481
1482
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483static char title__doc__[] =
1484"S.title() -> string\n\
1485\n\
1486Return a titlecased version of S, i.e. words start with uppercase\n\
1487characters, all remaining cased characters have lowercase.";
1488
1489static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001490string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001491{
1492 char *s = PyString_AS_STRING(self), *s_new;
1493 int i, n = PyString_GET_SIZE(self);
1494 int previous_is_cased = 0;
1495 PyObject *new;
1496
Guido van Rossum4c08d552000-03-10 22:55:18 +00001497 new = PyString_FromStringAndSize(NULL, n);
1498 if (new == NULL)
1499 return NULL;
1500 s_new = PyString_AsString(new);
1501 for (i = 0; i < n; i++) {
1502 int c = Py_CHARMASK(*s++);
1503 if (islower(c)) {
1504 if (!previous_is_cased)
1505 c = toupper(c);
1506 previous_is_cased = 1;
1507 } else if (isupper(c)) {
1508 if (previous_is_cased)
1509 c = tolower(c);
1510 previous_is_cased = 1;
1511 } else
1512 previous_is_cased = 0;
1513 *s_new++ = c;
1514 }
1515 return new;
1516}
1517
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001518static char capitalize__doc__[] =
1519"S.capitalize() -> string\n\
1520\n\
1521Return a copy of the string S with only its first character\n\
1522capitalized.";
1523
1524static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001525string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001526{
1527 char *s = PyString_AS_STRING(self), *s_new;
1528 int i, n = PyString_GET_SIZE(self);
1529 PyObject *new;
1530
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 new = PyString_FromStringAndSize(NULL, n);
1532 if (new == NULL)
1533 return NULL;
1534 s_new = PyString_AsString(new);
1535 if (0 < n) {
1536 int c = Py_CHARMASK(*s++);
1537 if (islower(c))
1538 *s_new = toupper(c);
1539 else
1540 *s_new = c;
1541 s_new++;
1542 }
1543 for (i = 1; i < n; i++) {
1544 int c = Py_CHARMASK(*s++);
1545 if (isupper(c))
1546 *s_new = tolower(c);
1547 else
1548 *s_new = c;
1549 s_new++;
1550 }
1551 return new;
1552}
1553
1554
1555static char count__doc__[] =
1556"S.count(sub[, start[, end]]) -> int\n\
1557\n\
1558Return the number of occurrences of substring sub in string\n\
1559S[start:end]. Optional arguments start and end are\n\
1560interpreted as in slice notation.";
1561
1562static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001563string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001565 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001566 int len = PyString_GET_SIZE(self), n;
1567 int i = 0, last = INT_MAX;
1568 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001569 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570
Guido van Rossumc6821402000-05-08 14:08:05 +00001571 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1572 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001574
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575 if (PyString_Check(subobj)) {
1576 sub = PyString_AS_STRING(subobj);
1577 n = PyString_GET_SIZE(subobj);
1578 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001579#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001580 else if (PyUnicode_Check(subobj)) {
1581 int count;
1582 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1583 if (count == -1)
1584 return NULL;
1585 else
1586 return PyInt_FromLong((long) count);
1587 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001588#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001589 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1590 return NULL;
1591
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001592 if (last > len)
1593 last = len;
1594 if (last < 0)
1595 last += len;
1596 if (last < 0)
1597 last = 0;
1598 if (i < 0)
1599 i += len;
1600 if (i < 0)
1601 i = 0;
1602 m = last + 1 - n;
1603 if (n == 0)
1604 return PyInt_FromLong((long) (m-i));
1605
1606 r = 0;
1607 while (i < m) {
1608 if (!memcmp(s+i, sub, n)) {
1609 r++;
1610 i += n;
1611 } else {
1612 i++;
1613 }
1614 }
1615 return PyInt_FromLong((long) r);
1616}
1617
1618
1619static char swapcase__doc__[] =
1620"S.swapcase() -> string\n\
1621\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001622Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623converted to lowercase and vice versa.";
1624
1625static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001626string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627{
1628 char *s = PyString_AS_STRING(self), *s_new;
1629 int i, n = PyString_GET_SIZE(self);
1630 PyObject *new;
1631
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632 new = PyString_FromStringAndSize(NULL, n);
1633 if (new == NULL)
1634 return NULL;
1635 s_new = PyString_AsString(new);
1636 for (i = 0; i < n; i++) {
1637 int c = Py_CHARMASK(*s++);
1638 if (islower(c)) {
1639 *s_new = toupper(c);
1640 }
1641 else if (isupper(c)) {
1642 *s_new = tolower(c);
1643 }
1644 else
1645 *s_new = c;
1646 s_new++;
1647 }
1648 return new;
1649}
1650
1651
1652static char translate__doc__[] =
1653"S.translate(table [,deletechars]) -> string\n\
1654\n\
1655Return a copy of the string S, where all characters occurring\n\
1656in the optional argument deletechars are removed, and the\n\
1657remaining characters have been mapped through the given\n\
1658translation table, which must be a string of length 256.";
1659
1660static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001661string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001663 register char *input, *output;
1664 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001665 register int i, c, changed = 0;
1666 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001667 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668 int inlen, tablen, dellen = 0;
1669 PyObject *result;
1670 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672
Guido van Rossum4c08d552000-03-10 22:55:18 +00001673 if (!PyArg_ParseTuple(args, "O|O:translate",
1674 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001676
1677 if (PyString_Check(tableobj)) {
1678 table1 = PyString_AS_STRING(tableobj);
1679 tablen = PyString_GET_SIZE(tableobj);
1680 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001681#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001682 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001683 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001684 parameter; instead a mapping to None will cause characters
1685 to be deleted. */
1686 if (delobj != NULL) {
1687 PyErr_SetString(PyExc_TypeError,
1688 "deletions are implemented differently for unicode");
1689 return NULL;
1690 }
1691 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1692 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001693#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001694 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001696
1697 if (delobj != NULL) {
1698 if (PyString_Check(delobj)) {
1699 del_table = PyString_AS_STRING(delobj);
1700 dellen = PyString_GET_SIZE(delobj);
1701 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001702#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001703 else if (PyUnicode_Check(delobj)) {
1704 PyErr_SetString(PyExc_TypeError,
1705 "deletions are implemented differently for unicode");
1706 return NULL;
1707 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001708#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001709 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1710 return NULL;
1711
1712 if (tablen != 256) {
1713 PyErr_SetString(PyExc_ValueError,
1714 "translation table must be 256 characters long");
1715 return NULL;
1716 }
1717 }
1718 else {
1719 del_table = NULL;
1720 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001721 }
1722
1723 table = table1;
1724 inlen = PyString_Size(input_obj);
1725 result = PyString_FromStringAndSize((char *)NULL, inlen);
1726 if (result == NULL)
1727 return NULL;
1728 output_start = output = PyString_AsString(result);
1729 input = PyString_AsString(input_obj);
1730
1731 if (dellen == 0) {
1732 /* If no deletions are required, use faster code */
1733 for (i = inlen; --i >= 0; ) {
1734 c = Py_CHARMASK(*input++);
1735 if (Py_CHARMASK((*output++ = table[c])) != c)
1736 changed = 1;
1737 }
1738 if (changed)
1739 return result;
1740 Py_DECREF(result);
1741 Py_INCREF(input_obj);
1742 return input_obj;
1743 }
1744
1745 for (i = 0; i < 256; i++)
1746 trans_table[i] = Py_CHARMASK(table[i]);
1747
1748 for (i = 0; i < dellen; i++)
1749 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1750
1751 for (i = inlen; --i >= 0; ) {
1752 c = Py_CHARMASK(*input++);
1753 if (trans_table[c] != -1)
1754 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1755 continue;
1756 changed = 1;
1757 }
1758 if (!changed) {
1759 Py_DECREF(result);
1760 Py_INCREF(input_obj);
1761 return input_obj;
1762 }
1763 /* Fix the size of the resulting string */
1764 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1765 return NULL;
1766 return result;
1767}
1768
1769
1770/* What follows is used for implementing replace(). Perry Stoll. */
1771
1772/*
1773 mymemfind
1774
1775 strstr replacement for arbitrary blocks of memory.
1776
Barry Warsaw51ac5802000-03-20 16:36:48 +00001777 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778 contents of memory pointed to by PAT. Returns the index into MEM if
1779 found, or -1 if not found. If len of PAT is greater than length of
1780 MEM, the function returns -1.
1781*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001782static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001783mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784{
1785 register int ii;
1786
1787 /* pattern can not occur in the last pat_len-1 chars */
1788 len -= pat_len;
1789
1790 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001791 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792 return ii;
1793 }
1794 }
1795 return -1;
1796}
1797
1798/*
1799 mymemcnt
1800
1801 Return the number of distinct times PAT is found in MEM.
1802 meaning mem=1111 and pat==11 returns 2.
1803 mem=11111 and pat==11 also return 2.
1804 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001805static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001806mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807{
1808 register int offset = 0;
1809 int nfound = 0;
1810
1811 while (len >= 0) {
1812 offset = mymemfind(mem, len, pat, pat_len);
1813 if (offset == -1)
1814 break;
1815 mem += offset + pat_len;
1816 len -= offset + pat_len;
1817 nfound++;
1818 }
1819 return nfound;
1820}
1821
1822/*
1823 mymemreplace
1824
Thomas Wouters7e474022000-07-16 12:04:32 +00001825 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826 replaced with SUB.
1827
Thomas Wouters7e474022000-07-16 12:04:32 +00001828 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829 of PAT in STR, then the original string is returned. Otherwise, a new
1830 string is allocated here and returned.
1831
1832 on return, out_len is:
1833 the length of output string, or
1834 -1 if the input string is returned, or
1835 unchanged if an error occurs (no memory).
1836
1837 return value is:
1838 the new string allocated locally, or
1839 NULL if an error occurred.
1840*/
1841static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001842mymemreplace(const char *str, int len, /* input string */
1843 const char *pat, int pat_len, /* pattern string to find */
1844 const char *sub, int sub_len, /* substitution string */
1845 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001846 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847{
1848 char *out_s;
1849 char *new_s;
1850 int nfound, offset, new_len;
1851
1852 if (len == 0 || pat_len > len)
1853 goto return_same;
1854
1855 /* find length of output string */
1856 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001857 if (count < 0)
1858 count = INT_MAX;
1859 else if (nfound > count)
1860 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001861 if (nfound == 0)
1862 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001863
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001865 if (new_len == 0) {
1866 /* Have to allocate something for the caller to free(). */
1867 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001868 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001869 return NULL;
1870 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001872 else {
1873 assert(new_len > 0);
1874 new_s = (char *)PyMem_MALLOC(new_len);
1875 if (new_s == NULL)
1876 return NULL;
1877 out_s = new_s;
1878
Tim Peters9c012af2001-05-10 00:32:57 +00001879 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001880 /* find index of next instance of pattern */
1881 offset = mymemfind(str, len, pat, pat_len);
1882 if (offset == -1)
1883 break;
1884
1885 /* copy non matching part of input string */
1886 memcpy(new_s, str, offset);
1887 str += offset + pat_len;
1888 len -= offset + pat_len;
1889
1890 /* copy substitute into the output string */
1891 new_s += offset;
1892 memcpy(new_s, sub, sub_len);
1893 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001894 }
1895 /* copy any remaining values into output string */
1896 if (len > 0)
1897 memcpy(new_s, str, len);
1898 }
1899 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 return out_s;
1901
1902 return_same:
1903 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001904 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001905}
1906
1907
1908static char replace__doc__[] =
1909"S.replace (old, new[, maxsplit]) -> string\n\
1910\n\
1911Return a copy of string S with all occurrences of substring\n\
1912old replaced by new. If the optional argument maxsplit is\n\
1913given, only the first maxsplit occurrences are replaced.";
1914
1915static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001916string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001917{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001918 const char *str = PyString_AS_STRING(self), *sub, *repl;
1919 char *new_s;
1920 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1921 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001922 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001923 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001924
Guido van Rossum4c08d552000-03-10 22:55:18 +00001925 if (!PyArg_ParseTuple(args, "OO|i:replace",
1926 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001927 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001928
1929 if (PyString_Check(subobj)) {
1930 sub = PyString_AS_STRING(subobj);
1931 sub_len = PyString_GET_SIZE(subobj);
1932 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001933#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001934 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001935 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001936 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001937#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001938 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1939 return NULL;
1940
1941 if (PyString_Check(replobj)) {
1942 repl = PyString_AS_STRING(replobj);
1943 repl_len = PyString_GET_SIZE(replobj);
1944 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001945#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001946 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001947 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001948 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001949#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001950 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1951 return NULL;
1952
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001953 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001954 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955 return NULL;
1956 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001957 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958 if (new_s == NULL) {
1959 PyErr_NoMemory();
1960 return NULL;
1961 }
1962 if (out_len == -1) {
1963 /* we're returning another reference to self */
1964 new = (PyObject*)self;
1965 Py_INCREF(new);
1966 }
1967 else {
1968 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001969 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001970 }
1971 return new;
1972}
1973
1974
1975static char startswith__doc__[] =
1976"S.startswith(prefix[, start[, end]]) -> int\n\
1977\n\
1978Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1979optional start, test S beginning at that position. With optional end, stop\n\
1980comparing S at that position.";
1981
1982static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001983string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001984{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001985 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001986 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001987 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001988 int plen;
1989 int start = 0;
1990 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001991 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992
Guido van Rossumc6821402000-05-08 14:08:05 +00001993 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1994 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001995 return NULL;
1996 if (PyString_Check(subobj)) {
1997 prefix = PyString_AS_STRING(subobj);
1998 plen = PyString_GET_SIZE(subobj);
1999 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002000#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002001 else if (PyUnicode_Check(subobj)) {
2002 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002003 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002004 subobj, start, end, -1);
2005 if (rc == -1)
2006 return NULL;
2007 else
2008 return PyInt_FromLong((long) rc);
2009 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002010#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002011 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002012 return NULL;
2013
2014 /* adopt Java semantics for index out of range. it is legal for
2015 * offset to be == plen, but this only returns true if prefix is
2016 * the empty string.
2017 */
2018 if (start < 0 || start+plen > len)
2019 return PyInt_FromLong(0);
2020
2021 if (!memcmp(str+start, prefix, plen)) {
2022 /* did the match end after the specified end? */
2023 if (end < 0)
2024 return PyInt_FromLong(1);
2025 else if (end - start < plen)
2026 return PyInt_FromLong(0);
2027 else
2028 return PyInt_FromLong(1);
2029 }
2030 else return PyInt_FromLong(0);
2031}
2032
2033
2034static char endswith__doc__[] =
2035"S.endswith(suffix[, start[, end]]) -> int\n\
2036\n\
2037Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
2038optional start, test S beginning at that position. With optional end, stop\n\
2039comparing S at that position.";
2040
2041static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002042string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002043{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002046 const char* suffix;
2047 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048 int start = 0;
2049 int end = -1;
2050 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002051 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002052
Guido van Rossumc6821402000-05-08 14:08:05 +00002053 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2054 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002055 return NULL;
2056 if (PyString_Check(subobj)) {
2057 suffix = PyString_AS_STRING(subobj);
2058 slen = PyString_GET_SIZE(subobj);
2059 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002060#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002061 else if (PyUnicode_Check(subobj)) {
2062 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002063 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002064 subobj, start, end, +1);
2065 if (rc == -1)
2066 return NULL;
2067 else
2068 return PyInt_FromLong((long) rc);
2069 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002070#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002071 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002072 return NULL;
2073
Guido van Rossum4c08d552000-03-10 22:55:18 +00002074 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002075 return PyInt_FromLong(0);
2076
2077 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002078 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079
Guido van Rossum4c08d552000-03-10 22:55:18 +00002080 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002081 return PyInt_FromLong(1);
2082 else return PyInt_FromLong(0);
2083}
2084
2085
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002086static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002087"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002088\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002089Encodes S using the codec registered for encoding. encoding defaults\n\
2090to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002091handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2092a ValueError. Other possible values are 'ignore' and 'replace'.";
2093
2094static PyObject *
2095string_encode(PyStringObject *self, PyObject *args)
2096{
2097 char *encoding = NULL;
2098 char *errors = NULL;
2099 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2100 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002101 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2102}
2103
2104
2105static char decode__doc__[] =
2106"S.decode([encoding[,errors]]) -> object\n\
2107\n\
2108Decodes S using the codec registered for encoding. encoding defaults\n\
2109to the default encoding. errors may be given to set a different error\n\
2110handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2111a ValueError. Other possible values are 'ignore' and 'replace'.";
2112
2113static PyObject *
2114string_decode(PyStringObject *self, PyObject *args)
2115{
2116 char *encoding = NULL;
2117 char *errors = NULL;
2118 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2119 return NULL;
2120 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002121}
2122
2123
Guido van Rossum4c08d552000-03-10 22:55:18 +00002124static char expandtabs__doc__[] =
2125"S.expandtabs([tabsize]) -> string\n\
2126\n\
2127Return a copy of S where all tab characters are expanded using spaces.\n\
2128If tabsize is not given, a tab size of 8 characters is assumed.";
2129
2130static PyObject*
2131string_expandtabs(PyStringObject *self, PyObject *args)
2132{
2133 const char *e, *p;
2134 char *q;
2135 int i, j;
2136 PyObject *u;
2137 int tabsize = 8;
2138
2139 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2140 return NULL;
2141
Thomas Wouters7e474022000-07-16 12:04:32 +00002142 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002143 i = j = 0;
2144 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2145 for (p = PyString_AS_STRING(self); p < e; p++)
2146 if (*p == '\t') {
2147 if (tabsize > 0)
2148 j += tabsize - (j % tabsize);
2149 }
2150 else {
2151 j++;
2152 if (*p == '\n' || *p == '\r') {
2153 i += j;
2154 j = 0;
2155 }
2156 }
2157
2158 /* Second pass: create output string and fill it */
2159 u = PyString_FromStringAndSize(NULL, i + j);
2160 if (!u)
2161 return NULL;
2162
2163 j = 0;
2164 q = PyString_AS_STRING(u);
2165
2166 for (p = PyString_AS_STRING(self); p < e; p++)
2167 if (*p == '\t') {
2168 if (tabsize > 0) {
2169 i = tabsize - (j % tabsize);
2170 j += i;
2171 while (i--)
2172 *q++ = ' ';
2173 }
2174 }
2175 else {
2176 j++;
2177 *q++ = *p;
2178 if (*p == '\n' || *p == '\r')
2179 j = 0;
2180 }
2181
2182 return u;
2183}
2184
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002185static
2186PyObject *pad(PyStringObject *self,
2187 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188 int right,
2189 char fill)
2190{
2191 PyObject *u;
2192
2193 if (left < 0)
2194 left = 0;
2195 if (right < 0)
2196 right = 0;
2197
2198 if (left == 0 && right == 0) {
2199 Py_INCREF(self);
2200 return (PyObject *)self;
2201 }
2202
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002203 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002204 left + PyString_GET_SIZE(self) + right);
2205 if (u) {
2206 if (left)
2207 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002208 memcpy(PyString_AS_STRING(u) + left,
2209 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002210 PyString_GET_SIZE(self));
2211 if (right)
2212 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2213 fill, right);
2214 }
2215
2216 return u;
2217}
2218
2219static char ljust__doc__[] =
2220"S.ljust(width) -> string\n\
2221\n\
2222Return S left justified in a string of length width. Padding is\n\
2223done using spaces.";
2224
2225static PyObject *
2226string_ljust(PyStringObject *self, PyObject *args)
2227{
2228 int width;
2229 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2230 return NULL;
2231
2232 if (PyString_GET_SIZE(self) >= width) {
2233 Py_INCREF(self);
2234 return (PyObject*) self;
2235 }
2236
2237 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2238}
2239
2240
2241static char rjust__doc__[] =
2242"S.rjust(width) -> string\n\
2243\n\
2244Return S right justified in a string of length width. Padding is\n\
2245done using spaces.";
2246
2247static PyObject *
2248string_rjust(PyStringObject *self, PyObject *args)
2249{
2250 int width;
2251 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2252 return NULL;
2253
2254 if (PyString_GET_SIZE(self) >= width) {
2255 Py_INCREF(self);
2256 return (PyObject*) self;
2257 }
2258
2259 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2260}
2261
2262
2263static char center__doc__[] =
2264"S.center(width) -> string\n\
2265\n\
2266Return S centered in a string of length width. Padding is done\n\
2267using spaces.";
2268
2269static PyObject *
2270string_center(PyStringObject *self, PyObject *args)
2271{
2272 int marg, left;
2273 int width;
2274
2275 if (!PyArg_ParseTuple(args, "i:center", &width))
2276 return NULL;
2277
2278 if (PyString_GET_SIZE(self) >= width) {
2279 Py_INCREF(self);
2280 return (PyObject*) self;
2281 }
2282
2283 marg = width - PyString_GET_SIZE(self);
2284 left = marg / 2 + (marg & width & 1);
2285
2286 return pad(self, left, marg - left, ' ');
2287}
2288
2289#if 0
2290static char zfill__doc__[] =
2291"S.zfill(width) -> string\n\
2292\n\
2293Pad a numeric string x with zeros on the left, to fill a field\n\
2294of the specified width. The string x is never truncated.";
2295
2296static PyObject *
2297string_zfill(PyStringObject *self, PyObject *args)
2298{
2299 int fill;
2300 PyObject *u;
2301 char *str;
2302
2303 int width;
2304 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2305 return NULL;
2306
2307 if (PyString_GET_SIZE(self) >= width) {
2308 Py_INCREF(self);
2309 return (PyObject*) self;
2310 }
2311
2312 fill = width - PyString_GET_SIZE(self);
2313
2314 u = pad(self, fill, 0, '0');
2315 if (u == NULL)
2316 return NULL;
2317
2318 str = PyString_AS_STRING(u);
2319 if (str[fill] == '+' || str[fill] == '-') {
2320 /* move sign to beginning of string */
2321 str[0] = str[fill];
2322 str[fill] = '0';
2323 }
2324
2325 return u;
2326}
2327#endif
2328
2329static char isspace__doc__[] =
2330"S.isspace() -> int\n\
2331\n\
2332Return 1 if there are only whitespace characters in S,\n\
23330 otherwise.";
2334
2335static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002336string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337{
Fred Drakeba096332000-07-09 07:04:36 +00002338 register const unsigned char *p
2339 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002340 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341
Guido van Rossum4c08d552000-03-10 22:55:18 +00002342 /* Shortcut for single character strings */
2343 if (PyString_GET_SIZE(self) == 1 &&
2344 isspace(*p))
2345 return PyInt_FromLong(1);
2346
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002347 /* Special case for empty strings */
2348 if (PyString_GET_SIZE(self) == 0)
2349 return PyInt_FromLong(0);
2350
Guido van Rossum4c08d552000-03-10 22:55:18 +00002351 e = p + PyString_GET_SIZE(self);
2352 for (; p < e; p++) {
2353 if (!isspace(*p))
2354 return PyInt_FromLong(0);
2355 }
2356 return PyInt_FromLong(1);
2357}
2358
2359
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002360static char isalpha__doc__[] =
2361"S.isalpha() -> int\n\
2362\n\
2363Return 1 if all characters in S are alphabetic\n\
2364and there is at least one character in S, 0 otherwise.";
2365
2366static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002367string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002368{
Fred Drakeba096332000-07-09 07:04:36 +00002369 register const unsigned char *p
2370 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002371 register const unsigned char *e;
2372
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002373 /* Shortcut for single character strings */
2374 if (PyString_GET_SIZE(self) == 1 &&
2375 isalpha(*p))
2376 return PyInt_FromLong(1);
2377
2378 /* Special case for empty strings */
2379 if (PyString_GET_SIZE(self) == 0)
2380 return PyInt_FromLong(0);
2381
2382 e = p + PyString_GET_SIZE(self);
2383 for (; p < e; p++) {
2384 if (!isalpha(*p))
2385 return PyInt_FromLong(0);
2386 }
2387 return PyInt_FromLong(1);
2388}
2389
2390
2391static char isalnum__doc__[] =
2392"S.isalnum() -> int\n\
2393\n\
2394Return 1 if all characters in S are alphanumeric\n\
2395and there is at least one character in S, 0 otherwise.";
2396
2397static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002398string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002399{
Fred Drakeba096332000-07-09 07:04:36 +00002400 register const unsigned char *p
2401 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002402 register const unsigned char *e;
2403
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002404 /* Shortcut for single character strings */
2405 if (PyString_GET_SIZE(self) == 1 &&
2406 isalnum(*p))
2407 return PyInt_FromLong(1);
2408
2409 /* Special case for empty strings */
2410 if (PyString_GET_SIZE(self) == 0)
2411 return PyInt_FromLong(0);
2412
2413 e = p + PyString_GET_SIZE(self);
2414 for (; p < e; p++) {
2415 if (!isalnum(*p))
2416 return PyInt_FromLong(0);
2417 }
2418 return PyInt_FromLong(1);
2419}
2420
2421
Guido van Rossum4c08d552000-03-10 22:55:18 +00002422static char isdigit__doc__[] =
2423"S.isdigit() -> int\n\
2424\n\
2425Return 1 if there are only digit characters in S,\n\
24260 otherwise.";
2427
2428static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002429string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002430{
Fred Drakeba096332000-07-09 07:04:36 +00002431 register const unsigned char *p
2432 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002433 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002434
Guido van Rossum4c08d552000-03-10 22:55:18 +00002435 /* Shortcut for single character strings */
2436 if (PyString_GET_SIZE(self) == 1 &&
2437 isdigit(*p))
2438 return PyInt_FromLong(1);
2439
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002440 /* Special case for empty strings */
2441 if (PyString_GET_SIZE(self) == 0)
2442 return PyInt_FromLong(0);
2443
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 e = p + PyString_GET_SIZE(self);
2445 for (; p < e; p++) {
2446 if (!isdigit(*p))
2447 return PyInt_FromLong(0);
2448 }
2449 return PyInt_FromLong(1);
2450}
2451
2452
2453static char islower__doc__[] =
2454"S.islower() -> int\n\
2455\n\
2456Return 1 if all cased characters in S are lowercase and there is\n\
2457at least one cased character in S, 0 otherwise.";
2458
2459static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002460string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002461{
Fred Drakeba096332000-07-09 07:04:36 +00002462 register const unsigned char *p
2463 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002464 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465 int cased;
2466
Guido van Rossum4c08d552000-03-10 22:55:18 +00002467 /* Shortcut for single character strings */
2468 if (PyString_GET_SIZE(self) == 1)
2469 return PyInt_FromLong(islower(*p) != 0);
2470
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002471 /* Special case for empty strings */
2472 if (PyString_GET_SIZE(self) == 0)
2473 return PyInt_FromLong(0);
2474
Guido van Rossum4c08d552000-03-10 22:55:18 +00002475 e = p + PyString_GET_SIZE(self);
2476 cased = 0;
2477 for (; p < e; p++) {
2478 if (isupper(*p))
2479 return PyInt_FromLong(0);
2480 else if (!cased && islower(*p))
2481 cased = 1;
2482 }
2483 return PyInt_FromLong(cased);
2484}
2485
2486
2487static char isupper__doc__[] =
2488"S.isupper() -> int\n\
2489\n\
2490Return 1 if all cased characters in S are uppercase and there is\n\
2491at least one cased character in S, 0 otherwise.";
2492
2493static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002494string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002495{
Fred Drakeba096332000-07-09 07:04:36 +00002496 register const unsigned char *p
2497 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002498 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002499 int cased;
2500
Guido van Rossum4c08d552000-03-10 22:55:18 +00002501 /* Shortcut for single character strings */
2502 if (PyString_GET_SIZE(self) == 1)
2503 return PyInt_FromLong(isupper(*p) != 0);
2504
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002505 /* Special case for empty strings */
2506 if (PyString_GET_SIZE(self) == 0)
2507 return PyInt_FromLong(0);
2508
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509 e = p + PyString_GET_SIZE(self);
2510 cased = 0;
2511 for (; p < e; p++) {
2512 if (islower(*p))
2513 return PyInt_FromLong(0);
2514 else if (!cased && isupper(*p))
2515 cased = 1;
2516 }
2517 return PyInt_FromLong(cased);
2518}
2519
2520
2521static char istitle__doc__[] =
2522"S.istitle() -> int\n\
2523\n\
2524Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2525may only follow uncased characters and lowercase characters only cased\n\
2526ones. Return 0 otherwise.";
2527
2528static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002529string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530{
Fred Drakeba096332000-07-09 07:04:36 +00002531 register const unsigned char *p
2532 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002533 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002534 int cased, previous_is_cased;
2535
Guido van Rossum4c08d552000-03-10 22:55:18 +00002536 /* Shortcut for single character strings */
2537 if (PyString_GET_SIZE(self) == 1)
2538 return PyInt_FromLong(isupper(*p) != 0);
2539
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002540 /* Special case for empty strings */
2541 if (PyString_GET_SIZE(self) == 0)
2542 return PyInt_FromLong(0);
2543
Guido van Rossum4c08d552000-03-10 22:55:18 +00002544 e = p + PyString_GET_SIZE(self);
2545 cased = 0;
2546 previous_is_cased = 0;
2547 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002548 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002549
2550 if (isupper(ch)) {
2551 if (previous_is_cased)
2552 return PyInt_FromLong(0);
2553 previous_is_cased = 1;
2554 cased = 1;
2555 }
2556 else if (islower(ch)) {
2557 if (!previous_is_cased)
2558 return PyInt_FromLong(0);
2559 previous_is_cased = 1;
2560 cased = 1;
2561 }
2562 else
2563 previous_is_cased = 0;
2564 }
2565 return PyInt_FromLong(cased);
2566}
2567
2568
2569static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002570"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002571\n\
2572Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002573Line breaks are not included in the resulting list unless keepends\n\
2574is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002575
2576#define SPLIT_APPEND(data, left, right) \
2577 str = PyString_FromStringAndSize(data + left, right - left); \
2578 if (!str) \
2579 goto onError; \
2580 if (PyList_Append(list, str)) { \
2581 Py_DECREF(str); \
2582 goto onError; \
2583 } \
2584 else \
2585 Py_DECREF(str);
2586
2587static PyObject*
2588string_splitlines(PyStringObject *self, PyObject *args)
2589{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590 register int i;
2591 register int j;
2592 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002593 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002594 PyObject *list;
2595 PyObject *str;
2596 char *data;
2597
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002598 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 return NULL;
2600
2601 data = PyString_AS_STRING(self);
2602 len = PyString_GET_SIZE(self);
2603
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604 list = PyList_New(0);
2605 if (!list)
2606 goto onError;
2607
2608 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002609 int eol;
2610
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611 /* Find a line and append it */
2612 while (i < len && data[i] != '\n' && data[i] != '\r')
2613 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614
2615 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002616 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002617 if (i < len) {
2618 if (data[i] == '\r' && i + 1 < len &&
2619 data[i+1] == '\n')
2620 i += 2;
2621 else
2622 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002623 if (keepends)
2624 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002625 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002626 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002627 j = i;
2628 }
2629 if (j < len) {
2630 SPLIT_APPEND(data, j, len);
2631 }
2632
2633 return list;
2634
2635 onError:
2636 Py_DECREF(list);
2637 return NULL;
2638}
2639
2640#undef SPLIT_APPEND
2641
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002642
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002643static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002644string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002645 /* Counterparts of the obsolete stropmodule functions; except
2646 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002647 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2648 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2649 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2650 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2651 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2652 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2653 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2654 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2655 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2656 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2657 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2658 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2659 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2660 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2661 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2662 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2663 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2664 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2665 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2666 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2667 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2668 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2669 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2670 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2671 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2672 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2673 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2674 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2675 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2676 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2677 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2678 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2679 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002680#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002681 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002682#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002683 {NULL, NULL} /* sentinel */
2684};
2685
Guido van Rossumae960af2001-08-30 03:11:59 +00002686staticforward PyObject *
2687str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2688
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002689static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002690string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002691{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002692 PyObject *x = NULL;
2693 static char *kwlist[] = {"object", 0};
2694
Guido van Rossumae960af2001-08-30 03:11:59 +00002695 if (type != &PyString_Type)
2696 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002697 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2698 return NULL;
2699 if (x == NULL)
2700 return PyString_FromString("");
2701 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002702}
2703
Guido van Rossumae960af2001-08-30 03:11:59 +00002704static PyObject *
2705str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2706{
2707 PyObject *tmp, *new;
2708 int n;
2709
2710 assert(PyType_IsSubtype(type, &PyString_Type));
2711 tmp = string_new(&PyString_Type, args, kwds);
2712 if (tmp == NULL)
2713 return NULL;
2714 assert(PyString_Check(tmp));
2715 new = type->tp_alloc(type, n = PyString_GET_SIZE(tmp));
Guido van Rossum29d55a32001-08-31 16:11:15 +00002716 if (new != NULL)
2717 memcpy(PyString_AS_STRING(new), PyString_AS_STRING(tmp), n+1);
2718 Py_DECREF(tmp);
Guido van Rossumae960af2001-08-30 03:11:59 +00002719 return new;
2720}
2721
Tim Peters6d6c1a32001-08-02 04:15:00 +00002722static char string_doc[] =
2723"str(object) -> string\n\
2724\n\
2725Return a nice string representation of the object.\n\
2726If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002727
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002728PyTypeObject PyString_Type = {
2729 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002730 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002731 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002732 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002733 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002734 (destructor)string_dealloc, /* tp_dealloc */
2735 (printfunc)string_print, /* tp_print */
2736 0, /* tp_getattr */
2737 0, /* tp_setattr */
2738 0, /* tp_compare */
2739 (reprfunc)string_repr, /* tp_repr */
2740 0, /* tp_as_number */
2741 &string_as_sequence, /* tp_as_sequence */
2742 0, /* tp_as_mapping */
2743 (hashfunc)string_hash, /* tp_hash */
2744 0, /* tp_call */
2745 (reprfunc)string_str, /* tp_str */
2746 PyObject_GenericGetAttr, /* tp_getattro */
2747 0, /* tp_setattro */
2748 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002749 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002750 string_doc, /* tp_doc */
2751 0, /* tp_traverse */
2752 0, /* tp_clear */
2753 (richcmpfunc)string_richcompare, /* tp_richcompare */
2754 0, /* tp_weaklistoffset */
2755 0, /* tp_iter */
2756 0, /* tp_iternext */
2757 string_methods, /* tp_methods */
2758 0, /* tp_members */
2759 0, /* tp_getset */
2760 0, /* tp_base */
2761 0, /* tp_dict */
2762 0, /* tp_descr_get */
2763 0, /* tp_descr_set */
2764 0, /* tp_dictoffset */
2765 0, /* tp_init */
2766 0, /* tp_alloc */
2767 string_new, /* tp_new */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002768};
2769
2770void
Fred Drakeba096332000-07-09 07:04:36 +00002771PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002772{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002773 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002774 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002775 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002776 if (w == NULL || !PyString_Check(*pv)) {
2777 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002778 *pv = NULL;
2779 return;
2780 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002781 v = string_concat((PyStringObject *) *pv, w);
2782 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002783 *pv = v;
2784}
2785
Guido van Rossum013142a1994-08-30 08:19:36 +00002786void
Fred Drakeba096332000-07-09 07:04:36 +00002787PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002788{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002789 PyString_Concat(pv, w);
2790 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002791}
2792
2793
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002794/* The following function breaks the notion that strings are immutable:
2795 it changes the size of a string. We get away with this only if there
2796 is only one module referencing the object. You can also think of it
2797 as creating a new string object and destroying the old one, only
2798 more efficiently. In any case, don't use this if the string may
2799 already be known to some other part of the code... */
2800
2801int
Fred Drakeba096332000-07-09 07:04:36 +00002802_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002803{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002804 register PyObject *v;
2805 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002806 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002807 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002808 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002809 Py_DECREF(v);
2810 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002811 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002812 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002813 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002814#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002815 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002816#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002817 _Py_ForgetReference(v);
2818 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002819 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002820 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002821 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002822 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002823 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002824 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002825 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002826 _Py_NewReference(*pv);
2827 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002828 sv->ob_size = newsize;
2829 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002830 return 0;
2831}
Guido van Rossume5372401993-03-16 12:15:04 +00002832
2833/* Helpers for formatstring */
2834
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002835static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002836getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002837{
2838 int argidx = *p_argidx;
2839 if (argidx < arglen) {
2840 (*p_argidx)++;
2841 if (arglen < 0)
2842 return args;
2843 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002844 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002845 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002846 PyErr_SetString(PyExc_TypeError,
2847 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002848 return NULL;
2849}
2850
Tim Peters38fd5b62000-09-21 05:43:11 +00002851/* Format codes
2852 * F_LJUST '-'
2853 * F_SIGN '+'
2854 * F_BLANK ' '
2855 * F_ALT '#'
2856 * F_ZERO '0'
2857 */
Guido van Rossume5372401993-03-16 12:15:04 +00002858#define F_LJUST (1<<0)
2859#define F_SIGN (1<<1)
2860#define F_BLANK (1<<2)
2861#define F_ALT (1<<3)
2862#define F_ZERO (1<<4)
2863
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002864static int
Fred Drakeba096332000-07-09 07:04:36 +00002865formatfloat(char *buf, size_t buflen, int flags,
2866 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002867{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002868 /* fmt = '%#.' + `prec` + `type`
2869 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002870 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002871 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002872 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002873 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002874 if (prec < 0)
2875 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002876 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2877 type = 'g';
2878 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002879 /* worst case length calc to ensure no buffer overrun:
2880 fmt = %#.<prec>g
2881 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002882 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002883 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2884 If prec=0 the effective precision is 1 (the leading digit is
2885 always given), therefore increase by one to 10+prec. */
2886 if (buflen <= (size_t)10 + (size_t)prec) {
2887 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002888 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002889 return -1;
2890 }
Guido van Rossume5372401993-03-16 12:15:04 +00002891 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002892 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002893}
2894
Tim Peters38fd5b62000-09-21 05:43:11 +00002895/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2896 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2897 * Python's regular ints.
2898 * Return value: a new PyString*, or NULL if error.
2899 * . *pbuf is set to point into it,
2900 * *plen set to the # of chars following that.
2901 * Caller must decref it when done using pbuf.
2902 * The string starting at *pbuf is of the form
2903 * "-"? ("0x" | "0X")? digit+
2904 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002905 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002906 * There will be at least prec digits, zero-filled on the left if
2907 * necessary to get that many.
2908 * val object to be converted
2909 * flags bitmask of format flags; only F_ALT is looked at
2910 * prec minimum number of digits; 0-fill on left if needed
2911 * type a character in [duoxX]; u acts the same as d
2912 *
2913 * CAUTION: o, x and X conversions on regular ints can never
2914 * produce a '-' sign, but can for Python's unbounded ints.
2915 */
2916PyObject*
2917_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2918 char **pbuf, int *plen)
2919{
2920 PyObject *result = NULL;
2921 char *buf;
2922 int i;
2923 int sign; /* 1 if '-', else 0 */
2924 int len; /* number of characters */
2925 int numdigits; /* len == numnondigits + numdigits */
2926 int numnondigits = 0;
2927
2928 switch (type) {
2929 case 'd':
2930 case 'u':
2931 result = val->ob_type->tp_str(val);
2932 break;
2933 case 'o':
2934 result = val->ob_type->tp_as_number->nb_oct(val);
2935 break;
2936 case 'x':
2937 case 'X':
2938 numnondigits = 2;
2939 result = val->ob_type->tp_as_number->nb_hex(val);
2940 break;
2941 default:
2942 assert(!"'type' not in [duoxX]");
2943 }
2944 if (!result)
2945 return NULL;
2946
2947 /* To modify the string in-place, there can only be one reference. */
2948 if (result->ob_refcnt != 1) {
2949 PyErr_BadInternalCall();
2950 return NULL;
2951 }
2952 buf = PyString_AsString(result);
2953 len = PyString_Size(result);
2954 if (buf[len-1] == 'L') {
2955 --len;
2956 buf[len] = '\0';
2957 }
2958 sign = buf[0] == '-';
2959 numnondigits += sign;
2960 numdigits = len - numnondigits;
2961 assert(numdigits > 0);
2962
Tim Petersfff53252001-04-12 18:38:48 +00002963 /* Get rid of base marker unless F_ALT */
2964 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002965 /* Need to skip 0x, 0X or 0. */
2966 int skipped = 0;
2967 switch (type) {
2968 case 'o':
2969 assert(buf[sign] == '0');
2970 /* If 0 is only digit, leave it alone. */
2971 if (numdigits > 1) {
2972 skipped = 1;
2973 --numdigits;
2974 }
2975 break;
2976 case 'x':
2977 case 'X':
2978 assert(buf[sign] == '0');
2979 assert(buf[sign + 1] == 'x');
2980 skipped = 2;
2981 numnondigits -= 2;
2982 break;
2983 }
2984 if (skipped) {
2985 buf += skipped;
2986 len -= skipped;
2987 if (sign)
2988 buf[0] = '-';
2989 }
2990 assert(len == numnondigits + numdigits);
2991 assert(numdigits > 0);
2992 }
2993
2994 /* Fill with leading zeroes to meet minimum width. */
2995 if (prec > numdigits) {
2996 PyObject *r1 = PyString_FromStringAndSize(NULL,
2997 numnondigits + prec);
2998 char *b1;
2999 if (!r1) {
3000 Py_DECREF(result);
3001 return NULL;
3002 }
3003 b1 = PyString_AS_STRING(r1);
3004 for (i = 0; i < numnondigits; ++i)
3005 *b1++ = *buf++;
3006 for (i = 0; i < prec - numdigits; i++)
3007 *b1++ = '0';
3008 for (i = 0; i < numdigits; i++)
3009 *b1++ = *buf++;
3010 *b1 = '\0';
3011 Py_DECREF(result);
3012 result = r1;
3013 buf = PyString_AS_STRING(result);
3014 len = numnondigits + prec;
3015 }
3016
3017 /* Fix up case for hex conversions. */
3018 switch (type) {
3019 case 'x':
3020 /* Need to convert all upper case letters to lower case. */
3021 for (i = 0; i < len; i++)
3022 if (buf[i] >= 'A' && buf[i] <= 'F')
3023 buf[i] += 'a'-'A';
3024 break;
3025 case 'X':
3026 /* Need to convert 0x to 0X (and -0x to -0X). */
3027 if (buf[sign + 1] == 'x')
3028 buf[sign + 1] = 'X';
3029 break;
3030 }
3031 *pbuf = buf;
3032 *plen = len;
3033 return result;
3034}
3035
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003036static int
Fred Drakeba096332000-07-09 07:04:36 +00003037formatint(char *buf, size_t buflen, int flags,
3038 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003039{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003040 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003041 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3042 + 1 + 1 = 24 */
3043 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003044 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003045 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003046 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003047 if (prec < 0)
3048 prec = 1;
3049 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00003050 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003051 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003052 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003053 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003054 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003055 return -1;
3056 }
Guido van Rossume5372401993-03-16 12:15:04 +00003057 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00003058 /* When converting 0 under %#x or %#X, C leaves off the base marker,
3059 * but we want it (for consistency with other %#x conversions, and
3060 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003061 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
3062 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
3063 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00003064 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003065 if (x == 0 &&
3066 (flags & F_ALT) &&
3067 (type == 'x' || type == 'X') &&
3068 buf[1] != (char)type) /* this last always true under std C */
3069 {
Tim Petersfff53252001-04-12 18:38:48 +00003070 memmove(buf+2, buf, strlen(buf) + 1);
3071 buf[0] = '0';
3072 buf[1] = (char)type;
3073 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003074 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003075}
3076
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003077static int
Fred Drakeba096332000-07-09 07:04:36 +00003078formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003079{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003080 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003081 if (PyString_Check(v)) {
3082 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003083 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003084 }
3085 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003086 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003087 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003088 }
3089 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003090 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003091}
3092
Guido van Rossum013142a1994-08-30 08:19:36 +00003093
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003094/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3095
3096 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3097 chars are formatted. XXX This is a magic number. Each formatting
3098 routine does bounds checking to ensure no overflow, but a better
3099 solution may be to malloc a buffer of appropriate size for each
3100 format. For now, the current solution is sufficient.
3101*/
3102#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003103
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003104PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003105PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003106{
3107 char *fmt, *res;
3108 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003109 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003110 PyObject *result, *orig_args;
3111#ifdef Py_USING_UNICODE
3112 PyObject *v, *w;
3113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003114 PyObject *dict = NULL;
3115 if (format == NULL || !PyString_Check(format) || args == NULL) {
3116 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003117 return NULL;
3118 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003119 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003120 fmt = PyString_AsString(format);
3121 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003122 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003123 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003124 if (result == NULL)
3125 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003126 res = PyString_AsString(result);
3127 if (PyTuple_Check(args)) {
3128 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003129 argidx = 0;
3130 }
3131 else {
3132 arglen = -1;
3133 argidx = -2;
3134 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003135 if (args->ob_type->tp_as_mapping)
3136 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003137 while (--fmtcnt >= 0) {
3138 if (*fmt != '%') {
3139 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003140 rescnt = fmtcnt + 100;
3141 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003142 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003143 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003144 res = PyString_AsString(result)
3145 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003146 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003147 }
3148 *res++ = *fmt++;
3149 }
3150 else {
3151 /* Got a format specifier */
3152 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003153 int width = -1;
3154 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003155 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003156 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003157 PyObject *v = NULL;
3158 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003159 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003160 int sign;
3161 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003162 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003163#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003164 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003165 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003166#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003167
Guido van Rossumda9c2711996-12-05 21:58:58 +00003168 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003169 if (*fmt == '(') {
3170 char *keystart;
3171 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003172 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003173 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003174
3175 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003176 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003177 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003178 goto error;
3179 }
3180 ++fmt;
3181 --fmtcnt;
3182 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003183 /* Skip over balanced parentheses */
3184 while (pcount > 0 && --fmtcnt >= 0) {
3185 if (*fmt == ')')
3186 --pcount;
3187 else if (*fmt == '(')
3188 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003189 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003190 }
3191 keylen = fmt - keystart - 1;
3192 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003193 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003194 "incomplete format key");
3195 goto error;
3196 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003197 key = PyString_FromStringAndSize(keystart,
3198 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003199 if (key == NULL)
3200 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003201 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003202 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003203 args_owned = 0;
3204 }
3205 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003206 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003207 if (args == NULL) {
3208 goto error;
3209 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003210 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003211 arglen = -1;
3212 argidx = -2;
3213 }
Guido van Rossume5372401993-03-16 12:15:04 +00003214 while (--fmtcnt >= 0) {
3215 switch (c = *fmt++) {
3216 case '-': flags |= F_LJUST; continue;
3217 case '+': flags |= F_SIGN; continue;
3218 case ' ': flags |= F_BLANK; continue;
3219 case '#': flags |= F_ALT; continue;
3220 case '0': flags |= F_ZERO; continue;
3221 }
3222 break;
3223 }
3224 if (c == '*') {
3225 v = getnextarg(args, arglen, &argidx);
3226 if (v == NULL)
3227 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003228 if (!PyInt_Check(v)) {
3229 PyErr_SetString(PyExc_TypeError,
3230 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003231 goto error;
3232 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003233 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003234 if (width < 0) {
3235 flags |= F_LJUST;
3236 width = -width;
3237 }
Guido van Rossume5372401993-03-16 12:15:04 +00003238 if (--fmtcnt >= 0)
3239 c = *fmt++;
3240 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003241 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003242 width = c - '0';
3243 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003244 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003245 if (!isdigit(c))
3246 break;
3247 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003248 PyErr_SetString(
3249 PyExc_ValueError,
3250 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003251 goto error;
3252 }
3253 width = width*10 + (c - '0');
3254 }
3255 }
3256 if (c == '.') {
3257 prec = 0;
3258 if (--fmtcnt >= 0)
3259 c = *fmt++;
3260 if (c == '*') {
3261 v = getnextarg(args, arglen, &argidx);
3262 if (v == NULL)
3263 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003264 if (!PyInt_Check(v)) {
3265 PyErr_SetString(
3266 PyExc_TypeError,
3267 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003268 goto error;
3269 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003270 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003271 if (prec < 0)
3272 prec = 0;
3273 if (--fmtcnt >= 0)
3274 c = *fmt++;
3275 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003276 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003277 prec = c - '0';
3278 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003279 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003280 if (!isdigit(c))
3281 break;
3282 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003283 PyErr_SetString(
3284 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003285 "prec too big");
3286 goto error;
3287 }
3288 prec = prec*10 + (c - '0');
3289 }
3290 }
3291 } /* prec */
3292 if (fmtcnt >= 0) {
3293 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003294 if (--fmtcnt >= 0)
3295 c = *fmt++;
3296 }
3297 }
3298 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003299 PyErr_SetString(PyExc_ValueError,
3300 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003301 goto error;
3302 }
3303 if (c != '%') {
3304 v = getnextarg(args, arglen, &argidx);
3305 if (v == NULL)
3306 goto error;
3307 }
3308 sign = 0;
3309 fill = ' ';
3310 switch (c) {
3311 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003312 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003313 len = 1;
3314 break;
3315 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003316 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003317#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003318 if (PyUnicode_Check(v)) {
3319 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003320 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003321 goto unicode;
3322 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003323#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003324 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003325 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003326 else
3327 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003328 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003329 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003330 if (!PyString_Check(temp)) {
3331 PyErr_SetString(PyExc_TypeError,
3332 "%s argument has non-string str()");
3333 goto error;
3334 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003335 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003336 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003337 if (prec >= 0 && len > prec)
3338 len = prec;
3339 break;
3340 case 'i':
3341 case 'd':
3342 case 'u':
3343 case 'o':
3344 case 'x':
3345 case 'X':
3346 if (c == 'i')
3347 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003348 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003349 temp = _PyString_FormatLong(v, flags,
3350 prec, c, &pbuf, &len);
3351 if (!temp)
3352 goto error;
3353 /* unbounded ints can always produce
3354 a sign character! */
3355 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003356 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003357 else {
3358 pbuf = formatbuf;
3359 len = formatint(pbuf, sizeof(formatbuf),
3360 flags, prec, c, v);
3361 if (len < 0)
3362 goto error;
3363 /* only d conversion is signed */
3364 sign = c == 'd';
3365 }
3366 if (flags & F_ZERO)
3367 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003368 break;
3369 case 'e':
3370 case 'E':
3371 case 'f':
3372 case 'g':
3373 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003374 pbuf = formatbuf;
3375 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003376 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003377 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003378 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003379 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003380 fill = '0';
3381 break;
3382 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003383 pbuf = formatbuf;
3384 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003385 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003386 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003387 break;
3388 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003389 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003390 "unsupported format character '%c' (0x%x) "
3391 "at index %i",
3392 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003393 goto error;
3394 }
3395 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003396 if (*pbuf == '-' || *pbuf == '+') {
3397 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003398 len--;
3399 }
3400 else if (flags & F_SIGN)
3401 sign = '+';
3402 else if (flags & F_BLANK)
3403 sign = ' ';
3404 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003405 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003406 }
3407 if (width < len)
3408 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003409 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003410 reslen -= rescnt;
3411 rescnt = width + fmtcnt + 100;
3412 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003413 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003414 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003415 res = PyString_AsString(result)
3416 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003417 }
3418 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003419 if (fill != ' ')
3420 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003421 rescnt--;
3422 if (width > len)
3423 width--;
3424 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003425 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3426 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003427 assert(pbuf[1] == c);
3428 if (fill != ' ') {
3429 *res++ = *pbuf++;
3430 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003431 }
Tim Petersfff53252001-04-12 18:38:48 +00003432 rescnt -= 2;
3433 width -= 2;
3434 if (width < 0)
3435 width = 0;
3436 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003437 }
3438 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003439 do {
3440 --rescnt;
3441 *res++ = fill;
3442 } while (--width > len);
3443 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003444 if (fill == ' ') {
3445 if (sign)
3446 *res++ = sign;
3447 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003448 (c == 'x' || c == 'X')) {
3449 assert(pbuf[0] == '0');
3450 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003451 *res++ = *pbuf++;
3452 *res++ = *pbuf++;
3453 }
3454 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003455 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003456 res += len;
3457 rescnt -= len;
3458 while (--width >= len) {
3459 --rescnt;
3460 *res++ = ' ';
3461 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003462 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003463 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003464 "not all arguments converted");
3465 goto error;
3466 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003467 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003468 } /* '%' */
3469 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003470 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003471 PyErr_SetString(PyExc_TypeError,
3472 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003473 goto error;
3474 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003475 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003476 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003477 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003478 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003479 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003480
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003481#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003482 unicode:
3483 if (args_owned) {
3484 Py_DECREF(args);
3485 args_owned = 0;
3486 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003487 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003488 if (PyTuple_Check(orig_args) && argidx > 0) {
3489 PyObject *v;
3490 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3491 v = PyTuple_New(n);
3492 if (v == NULL)
3493 goto error;
3494 while (--n >= 0) {
3495 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3496 Py_INCREF(w);
3497 PyTuple_SET_ITEM(v, n, w);
3498 }
3499 args = v;
3500 } else {
3501 Py_INCREF(orig_args);
3502 args = orig_args;
3503 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003504 args_owned = 1;
3505 /* Take what we have of the result and let the Unicode formatting
3506 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003507 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003508 if (_PyString_Resize(&result, rescnt))
3509 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003510 fmtcnt = PyString_GET_SIZE(format) - \
3511 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003512 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3513 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003514 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003515 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003516 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003517 if (v == NULL)
3518 goto error;
3519 /* Paste what we have (result) to what the Unicode formatting
3520 function returned (v) and return the result (or error) */
3521 w = PyUnicode_Concat(result, v);
3522 Py_DECREF(result);
3523 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003524 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003525 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003526#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003527
Guido van Rossume5372401993-03-16 12:15:04 +00003528 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003529 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003530 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003531 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003532 }
Guido van Rossume5372401993-03-16 12:15:04 +00003533 return NULL;
3534}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003535
3536
3537#ifdef INTERN_STRINGS
3538
Barry Warsaw4df762f2000-08-16 23:41:01 +00003539/* This dictionary will leak at PyString_Fini() time. That's acceptable
3540 * because PyString_Fini() specifically frees interned strings that are
3541 * only referenced by this dictionary. The CVS log entry for revision 2.45
3542 * says:
3543 *
3544 * Change the Fini function to only remove otherwise unreferenced
3545 * strings from the interned table. There are references in
3546 * hard-to-find static variables all over the interpreter, and it's not
3547 * worth trying to get rid of all those; but "uninterning" isn't fair
3548 * either and may cause subtle failures later -- so we have to keep them
3549 * in the interned table.
3550 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003551static PyObject *interned;
3552
3553void
Fred Drakeba096332000-07-09 07:04:36 +00003554PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003555{
3556 register PyStringObject *s = (PyStringObject *)(*p);
3557 PyObject *t;
3558 if (s == NULL || !PyString_Check(s))
3559 Py_FatalError("PyString_InternInPlace: strings only please!");
3560 if ((t = s->ob_sinterned) != NULL) {
3561 if (t == (PyObject *)s)
3562 return;
3563 Py_INCREF(t);
3564 *p = t;
3565 Py_DECREF(s);
3566 return;
3567 }
3568 if (interned == NULL) {
3569 interned = PyDict_New();
3570 if (interned == NULL)
3571 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003572 }
3573 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3574 Py_INCREF(t);
3575 *p = s->ob_sinterned = t;
3576 Py_DECREF(s);
3577 return;
3578 }
3579 t = (PyObject *)s;
3580 if (PyDict_SetItem(interned, t, t) == 0) {
3581 s->ob_sinterned = t;
3582 return;
3583 }
3584 PyErr_Clear();
3585}
3586
3587
3588PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003589PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003590{
3591 PyObject *s = PyString_FromString(cp);
3592 if (s == NULL)
3593 return NULL;
3594 PyString_InternInPlace(&s);
3595 return s;
3596}
3597
3598#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003599
3600void
Fred Drakeba096332000-07-09 07:04:36 +00003601PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003602{
3603 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003604 for (i = 0; i < UCHAR_MAX + 1; i++) {
3605 Py_XDECREF(characters[i]);
3606 characters[i] = NULL;
3607 }
3608#ifndef DONT_SHARE_SHORT_STRINGS
3609 Py_XDECREF(nullstring);
3610 nullstring = NULL;
3611#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003612#ifdef INTERN_STRINGS
3613 if (interned) {
3614 int pos, changed;
3615 PyObject *key, *value;
3616 do {
3617 changed = 0;
3618 pos = 0;
3619 while (PyDict_Next(interned, &pos, &key, &value)) {
3620 if (key->ob_refcnt == 2 && key == value) {
3621 PyDict_DelItem(interned, key);
3622 changed = 1;
3623 }
3624 }
3625 } while (changed);
3626 }
3627#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003628}
Barry Warsawa903ad982001-02-23 16:40:48 +00003629
3630#ifdef INTERN_STRINGS
3631void _Py_ReleaseInternedStrings(void)
3632{
3633 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003634 fprintf(stderr, "releasing interned strings\n");
3635 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003636 Py_DECREF(interned);
3637 interned = NULL;
3638 }
3639}
3640#endif /* INTERN_STRINGS */