blob: b88778ea1584dc45aae9285722d50e0d32322221 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
18/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000019 For both PyString_FromString() and PyString_FromStringAndSize(), the
20 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000021 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000022
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000023 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000025
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For PyString_FromStringAndSize(), the parameter the parameter `str' is
27 either NULL or else points to a string containing at least `size' bytes.
28 For PyString_FromStringAndSize(), the string in the `str' parameter does
29 not have to be null-terminated. (Therefore it is safe to construct a
30 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
31 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
32 bytes (setting the last byte to the null terminating character) and you can
33 fill in the data yourself. If `str' is non-NULL then the resulting
34 PyString object must be treated as immutable and you must not fill in nor
35 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 The PyObject member `op->ob_size', which denotes the number of "extra
38 items" in a variable-size object, will contain the number of bytes
39 allocated for string data, not counting the null terminating character. It
40 is therefore equal to the equal to the `size' parameter (for
41 PyString_FromStringAndSize()) or the length of the string in the `str'
42 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000067 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000123 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
159 count = vargs;
160#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000161 /* step 1: figure out how large a buffer we need */
162 for (f = format; *f; f++) {
163 if (*f == '%') {
164 const char* p = f;
165 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
166 ;
167
168 /* skip the 'l' in %ld, since it doesn't change the
169 width. although only %d is supported (see
170 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000171 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000172 if (*f == 'l' && *(f+1) == 'd')
173 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000174
Barry Warsawdadace02001-08-24 18:32:06 +0000175 switch (*f) {
176 case 'c':
177 (void)va_arg(count, int);
178 /* fall through... */
179 case '%':
180 n++;
181 break;
182 case 'd': case 'i': case 'x':
183 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000184 /* 20 bytes is enough to hold a 64-bit
185 integer. Decimal takes the most space.
186 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 n += 20;
188 break;
189 case 's':
190 s = va_arg(count, char*);
191 n += strlen(s);
192 break;
193 case 'p':
194 (void) va_arg(count, int);
195 /* maximum 64-bit pointer representation:
196 * 0xffffffffffffffff
197 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000198 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000199 */
200 n += 19;
201 break;
202 default:
203 /* if we stumble upon an unknown
204 formatting code, copy the rest of
205 the format string to the output
206 string. (we cannot just skip the
207 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000208 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000209 n += strlen(p);
210 goto expand;
211 }
212 } else
213 n++;
214 }
215 expand:
216 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000217 /* Since we've analyzed how much space we need for the worst case,
218 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 string = PyString_FromStringAndSize(NULL, n);
220 if (!string)
221 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000222
Barry Warsawdadace02001-08-24 18:32:06 +0000223 s = PyString_AsString(string);
224
225 for (f = format; *f; f++) {
226 if (*f == '%') {
227 const char* p = f++;
228 int i, longflag = 0;
229 /* parse the width.precision part (we're only
230 interested in the precision value, if any) */
231 n = 0;
232 while (isdigit(Py_CHARMASK(*f)))
233 n = (n*10) + *f++ - '0';
234 if (*f == '.') {
235 f++;
236 n = 0;
237 while (isdigit(Py_CHARMASK(*f)))
238 n = (n*10) + *f++ - '0';
239 }
240 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
241 f++;
242 /* handle the long flag, but only for %ld. others
243 can be added when necessary. */
244 if (*f == 'l' && *(f+1) == 'd') {
245 longflag = 1;
246 ++f;
247 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000248
Barry Warsawdadace02001-08-24 18:32:06 +0000249 switch (*f) {
250 case 'c':
251 *s++ = va_arg(vargs, int);
252 break;
253 case 'd':
254 if (longflag)
255 sprintf(s, "%ld", va_arg(vargs, long));
256 else
257 sprintf(s, "%d", va_arg(vargs, int));
258 s += strlen(s);
259 break;
260 case 'i':
261 sprintf(s, "%i", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'x':
265 sprintf(s, "%x", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 's':
269 p = va_arg(vargs, char*);
270 i = strlen(p);
271 if (n > 0 && i > n)
272 i = n;
273 memcpy(s, p, i);
274 s += i;
275 break;
276 case 'p':
277 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000278 /* %p is ill-defined: ensure leading 0x. */
279 if (s[1] == 'X')
280 s[1] = 'x';
281 else if (s[1] != 'x') {
282 memmove(s+2, s, strlen(s)+1);
283 s[0] = '0';
284 s[1] = 'x';
285 }
Barry Warsawdadace02001-08-24 18:32:06 +0000286 s += strlen(s);
287 break;
288 case '%':
289 *s++ = '%';
290 break;
291 default:
292 strcpy(s, p);
293 s += strlen(s);
294 goto end;
295 }
296 } else
297 *s++ = *f;
298 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000299
Barry Warsawdadace02001-08-24 18:32:06 +0000300 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000301 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000302 return string;
303}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000304
Barry Warsawdadace02001-08-24 18:32:06 +0000305PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000306PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000307{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000308 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000309 va_list vargs;
310
311#ifdef HAVE_STDARG_PROTOTYPES
312 va_start(vargs, format);
313#else
314 va_start(vargs);
315#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 ret = PyString_FromFormatV(format, vargs);
317 va_end(vargs);
318 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319}
320
321
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000322PyObject *PyString_Decode(const char *s,
323 int size,
324 const char *encoding,
325 const char *errors)
326{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000327 PyObject *v, *str;
328
329 str = PyString_FromStringAndSize(s, size);
330 if (str == NULL)
331 return NULL;
332 v = PyString_AsDecodedString(str, encoding, errors);
333 Py_DECREF(str);
334 return v;
335}
336
337PyObject *PyString_AsDecodedObject(PyObject *str,
338 const char *encoding,
339 const char *errors)
340{
341 PyObject *v;
342
343 if (!PyString_Check(str)) {
344 PyErr_BadArgument();
345 goto onError;
346 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000347
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000348 if (encoding == NULL) {
349#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000351#else
352 PyErr_SetString(PyExc_ValueError, "no encoding specified");
353 goto onError;
354#endif
355 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356
357 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000358 v = PyCodec_Decode(str, encoding, errors);
359 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361
362 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000363
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365 return NULL;
366}
367
368PyObject *PyString_AsDecodedString(PyObject *str,
369 const char *encoding,
370 const char *errors)
371{
372 PyObject *v;
373
374 v = PyString_AsDecodedObject(str, encoding, errors);
375 if (v == NULL)
376 goto onError;
377
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000378#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000379 /* Convert Unicode to a string using the default encoding */
380 if (PyUnicode_Check(v)) {
381 PyObject *temp = v;
382 v = PyUnicode_AsEncodedString(v, NULL, NULL);
383 Py_DECREF(temp);
384 if (v == NULL)
385 goto onError;
386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 if (!PyString_Check(v)) {
389 PyErr_Format(PyExc_TypeError,
390 "decoder did not return a string object (type=%.400s)",
391 v->ob_type->tp_name);
392 Py_DECREF(v);
393 goto onError;
394 }
395
396 return v;
397
398 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 return NULL;
400}
401
402PyObject *PyString_Encode(const char *s,
403 int size,
404 const char *encoding,
405 const char *errors)
406{
407 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 str = PyString_FromStringAndSize(s, size);
410 if (str == NULL)
411 return NULL;
412 v = PyString_AsEncodedString(str, encoding, errors);
413 Py_DECREF(str);
414 return v;
415}
416
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000417PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 const char *encoding,
419 const char *errors)
420{
421 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000422
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000423 if (!PyString_Check(str)) {
424 PyErr_BadArgument();
425 goto onError;
426 }
427
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000428 if (encoding == NULL) {
429#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000431#else
432 PyErr_SetString(PyExc_ValueError, "no encoding specified");
433 goto onError;
434#endif
435 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436
437 /* Encode via the codec registry */
438 v = PyCodec_Encode(str, encoding, errors);
439 if (v == NULL)
440 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000441
442 return v;
443
444 onError:
445 return NULL;
446}
447
448PyObject *PyString_AsEncodedString(PyObject *str,
449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v;
453
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000454 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000455 if (v == NULL)
456 goto onError;
457
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000458#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 /* Convert Unicode to a string using the default encoding */
460 if (PyUnicode_Check(v)) {
461 PyObject *temp = v;
462 v = PyUnicode_AsEncodedString(v, NULL, NULL);
463 Py_DECREF(temp);
464 if (v == NULL)
465 goto onError;
466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(v)) {
469 PyErr_Format(PyExc_TypeError,
470 "encoder did not return a string object (type=%.400s)",
471 v->ob_type->tp_name);
472 Py_DECREF(v);
473 goto onError;
474 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000476 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000477
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 onError:
479 return NULL;
480}
481
Guido van Rossum234f9421993-06-17 12:35:49 +0000482static void
Fred Drakeba096332000-07-09 07:04:36 +0000483string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000484{
Guido van Rossum9475a232001-10-05 20:51:39 +0000485 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000486}
487
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000488static int
489string_getsize(register PyObject *op)
490{
491 char *s;
492 int len;
493 if (PyString_AsStringAndSize(op, &s, &len))
494 return -1;
495 return len;
496}
497
498static /*const*/ char *
499string_getbuffer(register PyObject *op)
500{
501 char *s;
502 int len;
503 if (PyString_AsStringAndSize(op, &s, &len))
504 return NULL;
505 return s;
506}
507
Guido van Rossumd7047b31995-01-02 19:07:15 +0000508int
Fred Drakeba096332000-07-09 07:04:36 +0000509PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000511 if (!PyString_Check(op))
512 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000513 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
516/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000517PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519 if (!PyString_Check(op))
520 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000521 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522}
523
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000524int
525PyString_AsStringAndSize(register PyObject *obj,
526 register char **s,
527 register int *len)
528{
529 if (s == NULL) {
530 PyErr_BadInternalCall();
531 return -1;
532 }
533
534 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000536 if (PyUnicode_Check(obj)) {
537 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
538 if (obj == NULL)
539 return -1;
540 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000541 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000542#endif
543 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000544 PyErr_Format(PyExc_TypeError,
545 "expected string or Unicode object, "
546 "%.200s found", obj->ob_type->tp_name);
547 return -1;
548 }
549 }
550
551 *s = PyString_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyString_GET_SIZE(obj);
554 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected string without null bytes");
557 return -1;
558 }
559 return 0;
560}
561
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562/* Methods */
563
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564static int
Fred Drakeba096332000-07-09 07:04:36 +0000565string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566{
567 int i;
568 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000570
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000571 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000572 if (! PyString_CheckExact(op)) {
573 int ret;
574 /* A str subclass may have its own __str__ method. */
575 op = (PyStringObject *) PyObject_Str((PyObject *)op);
576 if (op == NULL)
577 return -1;
578 ret = string_print(op, fp, flags);
579 Py_DECREF(op);
580 return ret;
581 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000582 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000584 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000586
Thomas Wouters7e474022000-07-16 12:04:32 +0000587 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000588 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000589 if (memchr(op->ob_sval, '\'', op->ob_size) &&
590 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 quote = '"';
592
593 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594 for (i = 0; i < op->ob_size; i++) {
595 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000596 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000598 else if (c == '\t')
599 fprintf(fp, "\\t");
600 else if (c == '\n')
601 fprintf(fp, "\\n");
602 else if (c == '\r')
603 fprintf(fp, "\\r");
604 else if (c < ' ' || c >= 0x7f)
605 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000606 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000607 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000608 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000609 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000610 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000611}
612
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000613static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000614string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000616 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
617 PyObject *v;
618 if (newsize > INT_MAX) {
619 PyErr_SetString(PyExc_OverflowError,
620 "string is too large to make repr");
621 }
622 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000624 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000625 }
626 else {
627 register int i;
628 register char c;
629 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000630 int quote;
631
Thomas Wouters7e474022000-07-16 12:04:32 +0000632 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000633 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000634 if (memchr(op->ob_sval, '\'', op->ob_size) &&
635 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000636 quote = '"';
637
Tim Peters9161c8b2001-12-03 01:55:38 +0000638 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000639 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000641 /* There's at least enough room for a hex escape
642 and a closing quote. */
643 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000645 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000646 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000647 else if (c == '\t')
648 *p++ = '\\', *p++ = 't';
649 else if (c == '\n')
650 *p++ = '\\', *p++ = 'n';
651 else if (c == '\r')
652 *p++ = '\\', *p++ = 'r';
653 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000654 /* For performance, we don't want to call
655 PyOS_snprintf here (extra layers of
656 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000657 sprintf(p, "\\x%02x", c & 0xff);
658 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000659 }
660 else
661 *p++ = c;
662 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000663 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000664 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000666 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000667 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000668 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670}
671
Guido van Rossum189f1df2001-05-01 16:51:53 +0000672static PyObject *
673string_str(PyObject *s)
674{
Tim Petersc9933152001-10-16 20:18:24 +0000675 assert(PyString_Check(s));
676 if (PyString_CheckExact(s)) {
677 Py_INCREF(s);
678 return s;
679 }
680 else {
681 /* Subtype -- return genuine string with the same value. */
682 PyStringObject *t = (PyStringObject *) s;
683 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
684 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000685}
686
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687static int
Fred Drakeba096332000-07-09 07:04:36 +0000688string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689{
690 return a->ob_size;
691}
692
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000694string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
696 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000697 register PyStringObject *op;
698 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000699#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (PyUnicode_Check(bb))
701 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000702#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000703 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000704 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000705 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706 return NULL;
707 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000708#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000710 if ((a->ob_size == 0 || b->ob_size == 0) &&
711 PyString_CheckExact(a) && PyString_CheckExact(b)) {
712 if (a->ob_size == 0) {
713 Py_INCREF(bb);
714 return bb;
715 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000716 Py_INCREF(a);
717 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718 }
719 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000720 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000721 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000722 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000723 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000724 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000725 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000726 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000727 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000728 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
729 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
730 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000731 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732#undef b
733}
734
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000736string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000737{
738 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000739 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000740 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000741 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742 if (n < 0)
743 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000744 /* watch out for overflows: the size can overflow int,
745 * and the # of bytes needed can overflow size_t
746 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000748 if (n && size / n != a->ob_size) {
749 PyErr_SetString(PyExc_OverflowError,
750 "repeated string is too long");
751 return NULL;
752 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000753 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000754 Py_INCREF(a);
755 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756 }
Tim Peters8f422462000-09-09 06:13:41 +0000757 nbytes = size * sizeof(char);
758 if (nbytes / sizeof(char) != (size_t)size ||
759 nbytes + sizeof(PyStringObject) <= nbytes) {
760 PyErr_SetString(PyExc_OverflowError,
761 "repeated string is too long");
762 return NULL;
763 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000764 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000765 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000766 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000768 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000769 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000770 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000771 for (i = 0; i < size; i += a->ob_size)
772 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
773 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000774 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000775}
776
777/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
778
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000779static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000780string_slice(register PyStringObject *a, register int i, register int j)
781 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782{
783 if (i < 0)
784 i = 0;
785 if (j < 0)
786 j = 0; /* Avoid signed/unsigned bug in next line */
787 if (j > a->ob_size)
788 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000789 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
790 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000791 Py_INCREF(a);
792 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
794 if (j < i)
795 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000796 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797}
798
Guido van Rossum9284a572000-03-07 15:53:43 +0000799static int
Fred Drakeba096332000-07-09 07:04:36 +0000800string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000801{
802 register char *s, *end;
803 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000804#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000805 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000806 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000807#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000808 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000809 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000810 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000811 return -1;
812 }
813 c = PyString_AsString(el)[0];
814 s = PyString_AsString(a);
815 end = s + PyString_Size(a);
816 while (s < end) {
817 if (c == *s++)
818 return 1;
819 }
820 return 0;
821}
822
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000823static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000824string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000826 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000827 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000829 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 return NULL;
831 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000832 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000833 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000834 if (v == NULL)
835 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000836 else {
837#ifdef COUNT_ALLOCS
838 one_strings++;
839#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000840 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000841 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000842 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843}
844
Martin v. Löwiscd353062001-05-24 16:56:35 +0000845static PyObject*
846string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000848 int c;
849 int len_a, len_b;
850 int min_len;
851 PyObject *result;
852
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000853 /* Make sure both arguments are strings. */
854 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000855 result = Py_NotImplemented;
856 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000857 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000858 if (a == b) {
859 switch (op) {
860 case Py_EQ:case Py_LE:case Py_GE:
861 result = Py_True;
862 goto out;
863 case Py_NE:case Py_LT:case Py_GT:
864 result = Py_False;
865 goto out;
866 }
867 }
868 if (op == Py_EQ) {
869 /* Supporting Py_NE here as well does not save
870 much time, since Py_NE is rarely used. */
871 if (a->ob_size == b->ob_size
872 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000873 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +0000874 a->ob_size) == 0)) {
875 result = Py_True;
876 } else {
877 result = Py_False;
878 }
879 goto out;
880 }
881 len_a = a->ob_size; len_b = b->ob_size;
882 min_len = (len_a < len_b) ? len_a : len_b;
883 if (min_len > 0) {
884 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
885 if (c==0)
886 c = memcmp(a->ob_sval, b->ob_sval, min_len);
887 }else
888 c = 0;
889 if (c == 0)
890 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
891 switch (op) {
892 case Py_LT: c = c < 0; break;
893 case Py_LE: c = c <= 0; break;
894 case Py_EQ: assert(0); break; /* unreachable */
895 case Py_NE: c = c != 0; break;
896 case Py_GT: c = c > 0; break;
897 case Py_GE: c = c >= 0; break;
898 default:
899 result = Py_NotImplemented;
900 goto out;
901 }
902 result = c ? Py_True : Py_False;
903 out:
904 Py_INCREF(result);
905 return result;
906}
907
908int
909_PyString_Eq(PyObject *o1, PyObject *o2)
910{
911 PyStringObject *a, *b;
912 a = (PyStringObject*)o1;
913 b = (PyStringObject*)o2;
914 return a->ob_size == b->ob_size
915 && *a->ob_sval == *b->ob_sval
916 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917}
918
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919static long
Fred Drakeba096332000-07-09 07:04:36 +0000920string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000921{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000922 register int len;
923 register unsigned char *p;
924 register long x;
925
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000926 if (a->ob_shash != -1)
927 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000928 if (a->ob_sinterned != NULL)
929 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000931 len = a->ob_size;
932 p = (unsigned char *) a->ob_sval;
933 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000934 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000935 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000936 x ^= a->ob_size;
937 if (x == -1)
938 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000939 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000940 return x;
941}
942
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +0000943static PyObject*
944string_subscript(PyStringObject* self, PyObject* item)
945{
946 if (PyInt_Check(item)) {
947 long i = PyInt_AS_LONG(item);
948 if (i < 0)
949 i += PyString_GET_SIZE(self);
950 return string_item(self,i);
951 }
952 else if (PyLong_Check(item)) {
953 long i = PyLong_AsLong(item);
954 if (i == -1 && PyErr_Occurred())
955 return NULL;
956 if (i < 0)
957 i += PyString_GET_SIZE(self);
958 return string_item(self,i);
959 }
960 else if (PySlice_Check(item)) {
961 int start, stop, step, slicelength, cur, i;
962 char* source_buf;
963 char* result_buf;
964 PyObject* result;
965
966 if (PySlice_GetIndicesEx((PySliceObject*)item,
967 PyString_GET_SIZE(self),
968 &start, &stop, &step, &slicelength) < 0) {
969 return NULL;
970 }
971
972 if (slicelength <= 0) {
973 return PyString_FromStringAndSize("", 0);
974 }
975 else {
976 source_buf = PyString_AsString((PyObject*)self);
977 result_buf = PyMem_Malloc(slicelength);
978
979 for (cur = start, i = 0; i < slicelength;
980 cur += step, i++) {
981 result_buf[i] = source_buf[cur];
982 }
983
984 result = PyString_FromStringAndSize(result_buf,
985 slicelength);
986 PyMem_Free(result_buf);
987 return result;
988 }
989 }
990 else {
991 PyErr_SetString(PyExc_TypeError,
992 "string indices must be integers");
993 return NULL;
994 }
995}
996
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000997static int
Fred Drakeba096332000-07-09 07:04:36 +0000998string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000999{
1000 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +00001001 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +00001002 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001003 return -1;
1004 }
1005 *ptr = (void *)self->ob_sval;
1006 return self->ob_size;
1007}
1008
1009static int
Fred Drakeba096332000-07-09 07:04:36 +00001010string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001011{
Guido van Rossum045e6881997-09-08 18:30:11 +00001012 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +00001013 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001014 return -1;
1015}
1016
1017static int
Fred Drakeba096332000-07-09 07:04:36 +00001018string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001019{
1020 if ( lenp )
1021 *lenp = self->ob_size;
1022 return 1;
1023}
1024
Guido van Rossum1db70701998-10-08 02:18:52 +00001025static int
Fred Drakeba096332000-07-09 07:04:36 +00001026string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +00001027{
1028 if ( index != 0 ) {
1029 PyErr_SetString(PyExc_SystemError,
1030 "accessing non-existent string segment");
1031 return -1;
1032 }
1033 *ptr = self->ob_sval;
1034 return self->ob_size;
1035}
1036
Guido van Rossumc0b618a1997-05-02 03:12:38 +00001037static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +00001038 (inquiry)string_length, /*sq_length*/
1039 (binaryfunc)string_concat, /*sq_concat*/
1040 (intargfunc)string_repeat, /*sq_repeat*/
1041 (intargfunc)string_item, /*sq_item*/
1042 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +00001043 0, /*sq_ass_item*/
1044 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +00001045 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001046};
1047
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00001048static PyMappingMethods string_as_mapping = {
1049 (inquiry)string_length,
1050 (binaryfunc)string_subscript,
1051 0,
1052};
1053
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001054static PyBufferProcs string_as_buffer = {
1055 (getreadbufferproc)string_buffer_getreadbuf,
1056 (getwritebufferproc)string_buffer_getwritebuf,
1057 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +00001058 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00001059};
1060
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001061
1062
1063#define LEFTSTRIP 0
1064#define RIGHTSTRIP 1
1065#define BOTHSTRIP 2
1066
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001067/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001068static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1069
1070#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001071
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001072
1073static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001074split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001075{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001076 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001077 PyObject* item;
1078 PyObject *list = PyList_New(0);
1079
1080 if (list == NULL)
1081 return NULL;
1082
Guido van Rossum4c08d552000-03-10 22:55:18 +00001083 for (i = j = 0; i < len; ) {
1084 while (i < len && isspace(Py_CHARMASK(s[i])))
1085 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001086 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001087 while (i < len && !isspace(Py_CHARMASK(s[i])))
1088 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001089 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001090 if (maxsplit-- <= 0)
1091 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001092 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1093 if (item == NULL)
1094 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001095 err = PyList_Append(list, item);
1096 Py_DECREF(item);
1097 if (err < 0)
1098 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001099 while (i < len && isspace(Py_CHARMASK(s[i])))
1100 i++;
1101 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001102 }
1103 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001104 if (j < len) {
1105 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1106 if (item == NULL)
1107 goto finally;
1108 err = PyList_Append(list, item);
1109 Py_DECREF(item);
1110 if (err < 0)
1111 goto finally;
1112 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113 return list;
1114 finally:
1115 Py_DECREF(list);
1116 return NULL;
1117}
1118
1119
1120static char split__doc__[] =
1121"S.split([sep [,maxsplit]]) -> list of strings\n\
1122\n\
1123Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001124delimiter string. If maxsplit is given, at most maxsplit\n\
1125splits are done. If sep is not specified, any whitespace string\n\
1126is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001127
1128static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001129string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001130{
1131 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001132 int maxsplit = -1;
1133 const char *s = PyString_AS_STRING(self), *sub;
1134 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001135
Guido van Rossum4c08d552000-03-10 22:55:18 +00001136 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001137 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001138 if (maxsplit < 0)
1139 maxsplit = INT_MAX;
1140 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001141 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001142 if (PyString_Check(subobj)) {
1143 sub = PyString_AS_STRING(subobj);
1144 n = PyString_GET_SIZE(subobj);
1145 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001146#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001147 else if (PyUnicode_Check(subobj))
1148 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001149#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001150 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1151 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152 if (n == 0) {
1153 PyErr_SetString(PyExc_ValueError, "empty separator");
1154 return NULL;
1155 }
1156
1157 list = PyList_New(0);
1158 if (list == NULL)
1159 return NULL;
1160
1161 i = j = 0;
1162 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001163 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001164 if (maxsplit-- <= 0)
1165 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001166 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1167 if (item == NULL)
1168 goto fail;
1169 err = PyList_Append(list, item);
1170 Py_DECREF(item);
1171 if (err < 0)
1172 goto fail;
1173 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001174 }
1175 else
1176 i++;
1177 }
1178 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1179 if (item == NULL)
1180 goto fail;
1181 err = PyList_Append(list, item);
1182 Py_DECREF(item);
1183 if (err < 0)
1184 goto fail;
1185
1186 return list;
1187
1188 fail:
1189 Py_DECREF(list);
1190 return NULL;
1191}
1192
1193
1194static char join__doc__[] =
1195"S.join(sequence) -> string\n\
1196\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001197Return a string which is the concatenation of the strings in the\n\
1198sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001199
1200static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001201string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001202{
1203 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001204 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001205 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001206 char *p;
1207 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001208 size_t sz = 0;
1209 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001210 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001211
Tim Peters19fe14e2001-01-19 03:03:47 +00001212 seq = PySequence_Fast(orig, "");
1213 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001214 if (PyErr_ExceptionMatches(PyExc_TypeError))
1215 PyErr_Format(PyExc_TypeError,
1216 "sequence expected, %.80s found",
1217 orig->ob_type->tp_name);
1218 return NULL;
1219 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001220
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001221 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001222 if (seqlen == 0) {
1223 Py_DECREF(seq);
1224 return PyString_FromString("");
1225 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001226 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001227 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001228 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1229 PyErr_Format(PyExc_TypeError,
1230 "sequence item 0: expected string,"
1231 " %.80s found",
1232 item->ob_type->tp_name);
1233 Py_DECREF(seq);
1234 return NULL;
1235 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001236 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001237 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001238 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001239 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001240
Tim Peters19fe14e2001-01-19 03:03:47 +00001241 /* There are at least two things to join. Do a pre-pass to figure out
1242 * the total amount of space we'll need (sz), see whether any argument
1243 * is absurd, and defer to the Unicode join if appropriate.
1244 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001245 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001246 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001247 item = PySequence_Fast_GET_ITEM(seq, i);
1248 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001249#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001250 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001251 /* Defer to Unicode join.
1252 * CAUTION: There's no gurantee that the
1253 * original sequence can be iterated over
1254 * again, so we must pass seq here.
1255 */
1256 PyObject *result;
1257 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001258 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001259 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001260 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001261#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001262 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001263 "sequence item %i: expected string,"
1264 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001265 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001266 Py_DECREF(seq);
1267 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001268 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001269 sz += PyString_GET_SIZE(item);
1270 if (i != 0)
1271 sz += seplen;
1272 if (sz < old_sz || sz > INT_MAX) {
1273 PyErr_SetString(PyExc_OverflowError,
1274 "join() is too long for a Python string");
1275 Py_DECREF(seq);
1276 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001277 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001278 }
1279
1280 /* Allocate result space. */
1281 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1282 if (res == NULL) {
1283 Py_DECREF(seq);
1284 return NULL;
1285 }
1286
1287 /* Catenate everything. */
1288 p = PyString_AS_STRING(res);
1289 for (i = 0; i < seqlen; ++i) {
1290 size_t n;
1291 item = PySequence_Fast_GET_ITEM(seq, i);
1292 n = PyString_GET_SIZE(item);
1293 memcpy(p, PyString_AS_STRING(item), n);
1294 p += n;
1295 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001296 memcpy(p, sep, seplen);
1297 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001298 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001299 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001300
Jeremy Hylton49048292000-07-11 03:28:17 +00001301 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001303}
1304
Tim Peters52e155e2001-06-16 05:42:57 +00001305PyObject *
1306_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001307{
Tim Petersa7259592001-06-16 05:11:17 +00001308 assert(sep != NULL && PyString_Check(sep));
1309 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001310 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001311}
1312
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001313static long
Fred Drakeba096332000-07-09 07:04:36 +00001314string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001315{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317 int len = PyString_GET_SIZE(self);
1318 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001321 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001322 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001323 return -2;
1324 if (PyString_Check(subobj)) {
1325 sub = PyString_AS_STRING(subobj);
1326 n = PyString_GET_SIZE(subobj);
1327 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001328#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001329 else if (PyUnicode_Check(subobj))
1330 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001331#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001333 return -2;
1334
1335 if (last > len)
1336 last = len;
1337 if (last < 0)
1338 last += len;
1339 if (last < 0)
1340 last = 0;
1341 if (i < 0)
1342 i += len;
1343 if (i < 0)
1344 i = 0;
1345
Guido van Rossum4c08d552000-03-10 22:55:18 +00001346 if (dir > 0) {
1347 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001348 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001349 last -= n;
1350 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001351 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001352 return (long)i;
1353 }
1354 else {
1355 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001356
Guido van Rossum4c08d552000-03-10 22:55:18 +00001357 if (n == 0 && i <= last)
1358 return (long)last;
1359 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001360 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001361 return (long)j;
1362 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001363
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364 return -1;
1365}
1366
1367
1368static char find__doc__[] =
1369"S.find(sub [,start [,end]]) -> int\n\
1370\n\
1371Return the lowest index in S where substring sub is found,\n\
1372such that sub is contained within s[start,end]. Optional\n\
1373arguments start and end are interpreted as in slice notation.\n\
1374\n\
1375Return -1 on failure.";
1376
1377static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001378string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001380 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001381 if (result == -2)
1382 return NULL;
1383 return PyInt_FromLong(result);
1384}
1385
1386
1387static char index__doc__[] =
1388"S.index(sub [,start [,end]]) -> int\n\
1389\n\
1390Like S.find() but raise ValueError when the substring is not found.";
1391
1392static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001393string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001394{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001395 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001396 if (result == -2)
1397 return NULL;
1398 if (result == -1) {
1399 PyErr_SetString(PyExc_ValueError,
1400 "substring not found in string.index");
1401 return NULL;
1402 }
1403 return PyInt_FromLong(result);
1404}
1405
1406
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407static char rfind__doc__[] =
1408"S.rfind(sub [,start [,end]]) -> int\n\
1409\n\
1410Return the highest index in S where substring sub is found,\n\
1411such that sub is contained within s[start,end]. Optional\n\
1412arguments start and end are interpreted as in slice notation.\n\
1413\n\
1414Return -1 on failure.";
1415
1416static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001417string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001418{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001419 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001420 if (result == -2)
1421 return NULL;
1422 return PyInt_FromLong(result);
1423}
1424
1425
1426static char rindex__doc__[] =
1427"S.rindex(sub [,start [,end]]) -> int\n\
1428\n\
1429Like S.rfind() but raise ValueError when the substring is not found.";
1430
1431static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001432string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001433{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001434 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001435 if (result == -2)
1436 return NULL;
1437 if (result == -1) {
1438 PyErr_SetString(PyExc_ValueError,
1439 "substring not found in string.rindex");
1440 return NULL;
1441 }
1442 return PyInt_FromLong(result);
1443}
1444
1445
1446static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001447do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1448{
1449 char *s = PyString_AS_STRING(self);
1450 int len = PyString_GET_SIZE(self);
1451 char *sep = PyString_AS_STRING(sepobj);
1452 int seplen = PyString_GET_SIZE(sepobj);
1453 int i, j;
1454
1455 i = 0;
1456 if (striptype != RIGHTSTRIP) {
1457 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1458 i++;
1459 }
1460 }
1461
1462 j = len;
1463 if (striptype != LEFTSTRIP) {
1464 do {
1465 j--;
1466 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1467 j++;
1468 }
1469
1470 if (i == 0 && j == len && PyString_CheckExact(self)) {
1471 Py_INCREF(self);
1472 return (PyObject*)self;
1473 }
1474 else
1475 return PyString_FromStringAndSize(s+i, j-i);
1476}
1477
1478
1479static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001480do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481{
1482 char *s = PyString_AS_STRING(self);
1483 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001485 i = 0;
1486 if (striptype != RIGHTSTRIP) {
1487 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1488 i++;
1489 }
1490 }
1491
1492 j = len;
1493 if (striptype != LEFTSTRIP) {
1494 do {
1495 j--;
1496 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1497 j++;
1498 }
1499
Tim Peters8fa5dd02001-09-12 02:18:30 +00001500 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001501 Py_INCREF(self);
1502 return (PyObject*)self;
1503 }
1504 else
1505 return PyString_FromStringAndSize(s+i, j-i);
1506}
1507
1508
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001509static PyObject *
1510do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1511{
1512 PyObject *sep = NULL;
1513
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001514 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001515 return NULL;
1516
1517 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001518 if (PyString_Check(sep))
1519 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001520#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001521 else if (PyUnicode_Check(sep)) {
1522 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1523 PyObject *res;
1524 if (uniself==NULL)
1525 return NULL;
1526 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1527 striptype, sep);
1528 Py_DECREF(uniself);
1529 return res;
1530 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001531#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001532 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001533 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001534#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001535 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001536#else
1537 "%s arg must be None or str",
1538#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001539 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001540 return NULL;
1541 }
1542 return do_xstrip(self, striptype, sep);
1543 }
1544
1545 return do_strip(self, striptype);
1546}
1547
1548
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001549static char strip__doc__[] =
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001550"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551\n\
1552Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001553whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001554If sep is given and not None, remove characters in sep instead.\n\
1555If sep is unicode, S will be converted to unicode before stripping";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001556
1557static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001558string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001559{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001560 if (PyTuple_GET_SIZE(args) == 0)
1561 return do_strip(self, BOTHSTRIP); /* Common case */
1562 else
1563 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564}
1565
1566
1567static char lstrip__doc__[] =
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001568"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001569\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001570Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001571If sep is given and not None, remove characters in sep instead.\n\
1572If sep is unicode, S will be converted to unicode before stripping";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001573
1574static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001575string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001577 if (PyTuple_GET_SIZE(args) == 0)
1578 return do_strip(self, LEFTSTRIP); /* Common case */
1579 else
1580 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001581}
1582
1583
1584static char rstrip__doc__[] =
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001585"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001586\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001587Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001588If sep is given and not None, remove characters in sep instead.\n\
1589If sep is unicode, S will be converted to unicode before stripping";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590
1591static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001592string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001594 if (PyTuple_GET_SIZE(args) == 0)
1595 return do_strip(self, RIGHTSTRIP); /* Common case */
1596 else
1597 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001598}
1599
1600
1601static char lower__doc__[] =
1602"S.lower() -> string\n\
1603\n\
1604Return a copy of the string S converted to lowercase.";
1605
1606static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001607string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608{
1609 char *s = PyString_AS_STRING(self), *s_new;
1610 int i, n = PyString_GET_SIZE(self);
1611 PyObject *new;
1612
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001613 new = PyString_FromStringAndSize(NULL, n);
1614 if (new == NULL)
1615 return NULL;
1616 s_new = PyString_AsString(new);
1617 for (i = 0; i < n; i++) {
1618 int c = Py_CHARMASK(*s++);
1619 if (isupper(c)) {
1620 *s_new = tolower(c);
1621 } else
1622 *s_new = c;
1623 s_new++;
1624 }
1625 return new;
1626}
1627
1628
1629static char upper__doc__[] =
1630"S.upper() -> string\n\
1631\n\
1632Return a copy of the string S converted to uppercase.";
1633
1634static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001635string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001636{
1637 char *s = PyString_AS_STRING(self), *s_new;
1638 int i, n = PyString_GET_SIZE(self);
1639 PyObject *new;
1640
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 new = PyString_FromStringAndSize(NULL, n);
1642 if (new == NULL)
1643 return NULL;
1644 s_new = PyString_AsString(new);
1645 for (i = 0; i < n; i++) {
1646 int c = Py_CHARMASK(*s++);
1647 if (islower(c)) {
1648 *s_new = toupper(c);
1649 } else
1650 *s_new = c;
1651 s_new++;
1652 }
1653 return new;
1654}
1655
1656
Guido van Rossum4c08d552000-03-10 22:55:18 +00001657static char title__doc__[] =
1658"S.title() -> string\n\
1659\n\
1660Return a titlecased version of S, i.e. words start with uppercase\n\
1661characters, all remaining cased characters have lowercase.";
1662
1663static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001664string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001665{
1666 char *s = PyString_AS_STRING(self), *s_new;
1667 int i, n = PyString_GET_SIZE(self);
1668 int previous_is_cased = 0;
1669 PyObject *new;
1670
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671 new = PyString_FromStringAndSize(NULL, n);
1672 if (new == NULL)
1673 return NULL;
1674 s_new = PyString_AsString(new);
1675 for (i = 0; i < n; i++) {
1676 int c = Py_CHARMASK(*s++);
1677 if (islower(c)) {
1678 if (!previous_is_cased)
1679 c = toupper(c);
1680 previous_is_cased = 1;
1681 } else if (isupper(c)) {
1682 if (previous_is_cased)
1683 c = tolower(c);
1684 previous_is_cased = 1;
1685 } else
1686 previous_is_cased = 0;
1687 *s_new++ = c;
1688 }
1689 return new;
1690}
1691
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001692static char capitalize__doc__[] =
1693"S.capitalize() -> string\n\
1694\n\
1695Return a copy of the string S with only its first character\n\
1696capitalized.";
1697
1698static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001699string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001700{
1701 char *s = PyString_AS_STRING(self), *s_new;
1702 int i, n = PyString_GET_SIZE(self);
1703 PyObject *new;
1704
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705 new = PyString_FromStringAndSize(NULL, n);
1706 if (new == NULL)
1707 return NULL;
1708 s_new = PyString_AsString(new);
1709 if (0 < n) {
1710 int c = Py_CHARMASK(*s++);
1711 if (islower(c))
1712 *s_new = toupper(c);
1713 else
1714 *s_new = c;
1715 s_new++;
1716 }
1717 for (i = 1; i < n; i++) {
1718 int c = Py_CHARMASK(*s++);
1719 if (isupper(c))
1720 *s_new = tolower(c);
1721 else
1722 *s_new = c;
1723 s_new++;
1724 }
1725 return new;
1726}
1727
1728
1729static char count__doc__[] =
1730"S.count(sub[, start[, end]]) -> int\n\
1731\n\
1732Return the number of occurrences of substring sub in string\n\
1733S[start:end]. Optional arguments start and end are\n\
1734interpreted as in slice notation.";
1735
1736static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001737string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001738{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001739 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 int len = PyString_GET_SIZE(self), n;
1741 int i = 0, last = INT_MAX;
1742 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001743 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001744
Guido van Rossumc6821402000-05-08 14:08:05 +00001745 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1746 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001748
Guido van Rossum4c08d552000-03-10 22:55:18 +00001749 if (PyString_Check(subobj)) {
1750 sub = PyString_AS_STRING(subobj);
1751 n = PyString_GET_SIZE(subobj);
1752 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001753#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001754 else if (PyUnicode_Check(subobj)) {
1755 int count;
1756 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1757 if (count == -1)
1758 return NULL;
1759 else
1760 return PyInt_FromLong((long) count);
1761 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001762#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001763 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1764 return NULL;
1765
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766 if (last > len)
1767 last = len;
1768 if (last < 0)
1769 last += len;
1770 if (last < 0)
1771 last = 0;
1772 if (i < 0)
1773 i += len;
1774 if (i < 0)
1775 i = 0;
1776 m = last + 1 - n;
1777 if (n == 0)
1778 return PyInt_FromLong((long) (m-i));
1779
1780 r = 0;
1781 while (i < m) {
1782 if (!memcmp(s+i, sub, n)) {
1783 r++;
1784 i += n;
1785 } else {
1786 i++;
1787 }
1788 }
1789 return PyInt_FromLong((long) r);
1790}
1791
1792
1793static char swapcase__doc__[] =
1794"S.swapcase() -> string\n\
1795\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001796Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001797converted to lowercase and vice versa.";
1798
1799static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001800string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001801{
1802 char *s = PyString_AS_STRING(self), *s_new;
1803 int i, n = PyString_GET_SIZE(self);
1804 PyObject *new;
1805
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001806 new = PyString_FromStringAndSize(NULL, n);
1807 if (new == NULL)
1808 return NULL;
1809 s_new = PyString_AsString(new);
1810 for (i = 0; i < n; i++) {
1811 int c = Py_CHARMASK(*s++);
1812 if (islower(c)) {
1813 *s_new = toupper(c);
1814 }
1815 else if (isupper(c)) {
1816 *s_new = tolower(c);
1817 }
1818 else
1819 *s_new = c;
1820 s_new++;
1821 }
1822 return new;
1823}
1824
1825
1826static char translate__doc__[] =
1827"S.translate(table [,deletechars]) -> string\n\
1828\n\
1829Return a copy of the string S, where all characters occurring\n\
1830in the optional argument deletechars are removed, and the\n\
1831remaining characters have been mapped through the given\n\
1832translation table, which must be a string of length 256.";
1833
1834static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001835string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001837 register char *input, *output;
1838 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001839 register int i, c, changed = 0;
1840 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001841 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001842 int inlen, tablen, dellen = 0;
1843 PyObject *result;
1844 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001845 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846
Guido van Rossum4c08d552000-03-10 22:55:18 +00001847 if (!PyArg_ParseTuple(args, "O|O:translate",
1848 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001850
1851 if (PyString_Check(tableobj)) {
1852 table1 = PyString_AS_STRING(tableobj);
1853 tablen = PyString_GET_SIZE(tableobj);
1854 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001855#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001856 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001857 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001858 parameter; instead a mapping to None will cause characters
1859 to be deleted. */
1860 if (delobj != NULL) {
1861 PyErr_SetString(PyExc_TypeError,
1862 "deletions are implemented differently for unicode");
1863 return NULL;
1864 }
1865 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1866 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001867#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001869 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870
1871 if (delobj != NULL) {
1872 if (PyString_Check(delobj)) {
1873 del_table = PyString_AS_STRING(delobj);
1874 dellen = PyString_GET_SIZE(delobj);
1875 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001876#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001877 else if (PyUnicode_Check(delobj)) {
1878 PyErr_SetString(PyExc_TypeError,
1879 "deletions are implemented differently for unicode");
1880 return NULL;
1881 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001882#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001883 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1884 return NULL;
1885
1886 if (tablen != 256) {
1887 PyErr_SetString(PyExc_ValueError,
1888 "translation table must be 256 characters long");
1889 return NULL;
1890 }
1891 }
1892 else {
1893 del_table = NULL;
1894 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001895 }
1896
1897 table = table1;
1898 inlen = PyString_Size(input_obj);
1899 result = PyString_FromStringAndSize((char *)NULL, inlen);
1900 if (result == NULL)
1901 return NULL;
1902 output_start = output = PyString_AsString(result);
1903 input = PyString_AsString(input_obj);
1904
1905 if (dellen == 0) {
1906 /* If no deletions are required, use faster code */
1907 for (i = inlen; --i >= 0; ) {
1908 c = Py_CHARMASK(*input++);
1909 if (Py_CHARMASK((*output++ = table[c])) != c)
1910 changed = 1;
1911 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001912 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913 return result;
1914 Py_DECREF(result);
1915 Py_INCREF(input_obj);
1916 return input_obj;
1917 }
1918
1919 for (i = 0; i < 256; i++)
1920 trans_table[i] = Py_CHARMASK(table[i]);
1921
1922 for (i = 0; i < dellen; i++)
1923 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1924
1925 for (i = inlen; --i >= 0; ) {
1926 c = Py_CHARMASK(*input++);
1927 if (trans_table[c] != -1)
1928 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1929 continue;
1930 changed = 1;
1931 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001932 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001933 Py_DECREF(result);
1934 Py_INCREF(input_obj);
1935 return input_obj;
1936 }
1937 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001938 if (inlen > 0)
1939 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940 return result;
1941}
1942
1943
1944/* What follows is used for implementing replace(). Perry Stoll. */
1945
1946/*
1947 mymemfind
1948
1949 strstr replacement for arbitrary blocks of memory.
1950
Barry Warsaw51ac5802000-03-20 16:36:48 +00001951 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001952 contents of memory pointed to by PAT. Returns the index into MEM if
1953 found, or -1 if not found. If len of PAT is greater than length of
1954 MEM, the function returns -1.
1955*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001956static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001957mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001958{
1959 register int ii;
1960
1961 /* pattern can not occur in the last pat_len-1 chars */
1962 len -= pat_len;
1963
1964 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001965 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001966 return ii;
1967 }
1968 }
1969 return -1;
1970}
1971
1972/*
1973 mymemcnt
1974
1975 Return the number of distinct times PAT is found in MEM.
1976 meaning mem=1111 and pat==11 returns 2.
1977 mem=11111 and pat==11 also return 2.
1978 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001979static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001980mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001981{
1982 register int offset = 0;
1983 int nfound = 0;
1984
1985 while (len >= 0) {
1986 offset = mymemfind(mem, len, pat, pat_len);
1987 if (offset == -1)
1988 break;
1989 mem += offset + pat_len;
1990 len -= offset + pat_len;
1991 nfound++;
1992 }
1993 return nfound;
1994}
1995
1996/*
1997 mymemreplace
1998
Thomas Wouters7e474022000-07-16 12:04:32 +00001999 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002000 replaced with SUB.
2001
Thomas Wouters7e474022000-07-16 12:04:32 +00002002 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002003 of PAT in STR, then the original string is returned. Otherwise, a new
2004 string is allocated here and returned.
2005
2006 on return, out_len is:
2007 the length of output string, or
2008 -1 if the input string is returned, or
2009 unchanged if an error occurs (no memory).
2010
2011 return value is:
2012 the new string allocated locally, or
2013 NULL if an error occurred.
2014*/
2015static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00002016mymemreplace(const char *str, int len, /* input string */
2017 const char *pat, int pat_len, /* pattern string to find */
2018 const char *sub, int sub_len, /* substitution string */
2019 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00002020 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002021{
2022 char *out_s;
2023 char *new_s;
2024 int nfound, offset, new_len;
2025
2026 if (len == 0 || pat_len > len)
2027 goto return_same;
2028
2029 /* find length of output string */
2030 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00002031 if (count < 0)
2032 count = INT_MAX;
2033 else if (nfound > count)
2034 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002035 if (nfound == 0)
2036 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002037
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002038 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00002039 if (new_len == 0) {
2040 /* Have to allocate something for the caller to free(). */
2041 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00002042 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00002043 return NULL;
2044 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002045 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00002046 else {
2047 assert(new_len > 0);
2048 new_s = (char *)PyMem_MALLOC(new_len);
2049 if (new_s == NULL)
2050 return NULL;
2051 out_s = new_s;
2052
Tim Peters9c012af2001-05-10 00:32:57 +00002053 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00002054 /* find index of next instance of pattern */
2055 offset = mymemfind(str, len, pat, pat_len);
2056 if (offset == -1)
2057 break;
2058
2059 /* copy non matching part of input string */
2060 memcpy(new_s, str, offset);
2061 str += offset + pat_len;
2062 len -= offset + pat_len;
2063
2064 /* copy substitute into the output string */
2065 new_s += offset;
2066 memcpy(new_s, sub, sub_len);
2067 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002068 }
2069 /* copy any remaining values into output string */
2070 if (len > 0)
2071 memcpy(new_s, str, len);
2072 }
2073 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002074 return out_s;
2075
2076 return_same:
2077 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002078 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079}
2080
2081
2082static char replace__doc__[] =
2083"S.replace (old, new[, maxsplit]) -> string\n\
2084\n\
2085Return a copy of string S with all occurrences of substring\n\
2086old replaced by new. If the optional argument maxsplit is\n\
2087given, only the first maxsplit occurrences are replaced.";
2088
2089static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002090string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002091{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092 const char *str = PyString_AS_STRING(self), *sub, *repl;
2093 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002094 const int len = PyString_GET_SIZE(self);
2095 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002096 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002097 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002098 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099
Guido van Rossum4c08d552000-03-10 22:55:18 +00002100 if (!PyArg_ParseTuple(args, "OO|i:replace",
2101 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002103
2104 if (PyString_Check(subobj)) {
2105 sub = PyString_AS_STRING(subobj);
2106 sub_len = PyString_GET_SIZE(subobj);
2107 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002108#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002110 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002112#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002113 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2114 return NULL;
2115
2116 if (PyString_Check(replobj)) {
2117 repl = PyString_AS_STRING(replobj);
2118 repl_len = PyString_GET_SIZE(replobj);
2119 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002120#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002121 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002122 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002123 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002124#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002125 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2126 return NULL;
2127
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002128 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002129 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002130 return NULL;
2131 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002132 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002133 if (new_s == NULL) {
2134 PyErr_NoMemory();
2135 return NULL;
2136 }
2137 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002138 if (PyString_CheckExact(self)) {
2139 /* we're returning another reference to self */
2140 new = (PyObject*)self;
2141 Py_INCREF(new);
2142 }
2143 else {
2144 new = PyString_FromStringAndSize(str, len);
2145 if (new == NULL)
2146 return NULL;
2147 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148 }
2149 else {
2150 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002151 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152 }
2153 return new;
2154}
2155
2156
2157static char startswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002158"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002160Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161optional start, test S beginning at that position. With optional end, stop\n\
2162comparing S at that position.";
2163
2164static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002165string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002166{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002167 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002169 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 int plen;
2171 int start = 0;
2172 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002173 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174
Guido van Rossumc6821402000-05-08 14:08:05 +00002175 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2176 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002177 return NULL;
2178 if (PyString_Check(subobj)) {
2179 prefix = PyString_AS_STRING(subobj);
2180 plen = PyString_GET_SIZE(subobj);
2181 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002182#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002183 else if (PyUnicode_Check(subobj)) {
2184 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002185 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002186 subobj, start, end, -1);
2187 if (rc == -1)
2188 return NULL;
2189 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002190 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002191 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002192#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194 return NULL;
2195
2196 /* adopt Java semantics for index out of range. it is legal for
2197 * offset to be == plen, but this only returns true if prefix is
2198 * the empty string.
2199 */
2200 if (start < 0 || start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002201 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002202
2203 if (!memcmp(str+start, prefix, plen)) {
2204 /* did the match end after the specified end? */
2205 if (end < 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002206 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002207 else if (end - start < plen)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002208 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002209 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002210 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002211 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002212 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002213}
2214
2215
2216static char endswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002217"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002218\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002219Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002220optional start, test S beginning at that position. With optional end, stop\n\
2221comparing S at that position.";
2222
2223static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002224string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002225{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002227 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 const char* suffix;
2229 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002230 int start = 0;
2231 int end = -1;
2232 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002233 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002234
Guido van Rossumc6821402000-05-08 14:08:05 +00002235 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2236 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002237 return NULL;
2238 if (PyString_Check(subobj)) {
2239 suffix = PyString_AS_STRING(subobj);
2240 slen = PyString_GET_SIZE(subobj);
2241 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002242#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002243 else if (PyUnicode_Check(subobj)) {
2244 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002245 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002246 subobj, start, end, +1);
2247 if (rc == -1)
2248 return NULL;
2249 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002250 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002251 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002252#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002254 return NULL;
2255
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256 if (start < 0 || start > len || slen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002257 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002258
2259 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002260 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002261
Guido van Rossum4c08d552000-03-10 22:55:18 +00002262 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002263 return PyBool_FromLong(1);
2264 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002265}
2266
2267
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002268static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002269"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002270\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002271Encodes S using the codec registered for encoding. encoding defaults\n\
2272to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002273handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2274a ValueError. Other possible values are 'ignore' and 'replace'.";
2275
2276static PyObject *
2277string_encode(PyStringObject *self, PyObject *args)
2278{
2279 char *encoding = NULL;
2280 char *errors = NULL;
2281 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2282 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002283 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2284}
2285
2286
2287static char decode__doc__[] =
2288"S.decode([encoding[,errors]]) -> object\n\
2289\n\
2290Decodes S using the codec registered for encoding. encoding defaults\n\
2291to the default encoding. errors may be given to set a different error\n\
2292handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2293a ValueError. Other possible values are 'ignore' and 'replace'.";
2294
2295static PyObject *
2296string_decode(PyStringObject *self, PyObject *args)
2297{
2298 char *encoding = NULL;
2299 char *errors = NULL;
2300 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2301 return NULL;
2302 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002303}
2304
2305
Guido van Rossum4c08d552000-03-10 22:55:18 +00002306static char expandtabs__doc__[] =
2307"S.expandtabs([tabsize]) -> string\n\
2308\n\
2309Return a copy of S where all tab characters are expanded using spaces.\n\
2310If tabsize is not given, a tab size of 8 characters is assumed.";
2311
2312static PyObject*
2313string_expandtabs(PyStringObject *self, PyObject *args)
2314{
2315 const char *e, *p;
2316 char *q;
2317 int i, j;
2318 PyObject *u;
2319 int tabsize = 8;
2320
2321 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2322 return NULL;
2323
Thomas Wouters7e474022000-07-16 12:04:32 +00002324 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002325 i = j = 0;
2326 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2327 for (p = PyString_AS_STRING(self); p < e; p++)
2328 if (*p == '\t') {
2329 if (tabsize > 0)
2330 j += tabsize - (j % tabsize);
2331 }
2332 else {
2333 j++;
2334 if (*p == '\n' || *p == '\r') {
2335 i += j;
2336 j = 0;
2337 }
2338 }
2339
2340 /* Second pass: create output string and fill it */
2341 u = PyString_FromStringAndSize(NULL, i + j);
2342 if (!u)
2343 return NULL;
2344
2345 j = 0;
2346 q = PyString_AS_STRING(u);
2347
2348 for (p = PyString_AS_STRING(self); p < e; p++)
2349 if (*p == '\t') {
2350 if (tabsize > 0) {
2351 i = tabsize - (j % tabsize);
2352 j += i;
2353 while (i--)
2354 *q++ = ' ';
2355 }
2356 }
2357 else {
2358 j++;
2359 *q++ = *p;
2360 if (*p == '\n' || *p == '\r')
2361 j = 0;
2362 }
2363
2364 return u;
2365}
2366
Tim Peters8fa5dd02001-09-12 02:18:30 +00002367static PyObject *
2368pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369{
2370 PyObject *u;
2371
2372 if (left < 0)
2373 left = 0;
2374 if (right < 0)
2375 right = 0;
2376
Tim Peters8fa5dd02001-09-12 02:18:30 +00002377 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 Py_INCREF(self);
2379 return (PyObject *)self;
2380 }
2381
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002382 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383 left + PyString_GET_SIZE(self) + right);
2384 if (u) {
2385 if (left)
2386 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002387 memcpy(PyString_AS_STRING(u) + left,
2388 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002389 PyString_GET_SIZE(self));
2390 if (right)
2391 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2392 fill, right);
2393 }
2394
2395 return u;
2396}
2397
2398static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002399"S.ljust(width) -> string\n"
2400"\n"
2401"Return S left justified in a string of length width. Padding is\n"
2402"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002403
2404static PyObject *
2405string_ljust(PyStringObject *self, PyObject *args)
2406{
2407 int width;
2408 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2409 return NULL;
2410
Tim Peters8fa5dd02001-09-12 02:18:30 +00002411 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002412 Py_INCREF(self);
2413 return (PyObject*) self;
2414 }
2415
2416 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2417}
2418
2419
2420static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002421"S.rjust(width) -> string\n"
2422"\n"
2423"Return S right justified in a string of length width. Padding is\n"
2424"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002425
2426static PyObject *
2427string_rjust(PyStringObject *self, PyObject *args)
2428{
2429 int width;
2430 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2431 return NULL;
2432
Tim Peters8fa5dd02001-09-12 02:18:30 +00002433 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002434 Py_INCREF(self);
2435 return (PyObject*) self;
2436 }
2437
2438 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2439}
2440
2441
2442static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002443"S.center(width) -> string\n"
2444"\n"
2445"Return S centered in a string of length width. Padding is done\n"
2446"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002447
2448static PyObject *
2449string_center(PyStringObject *self, PyObject *args)
2450{
2451 int marg, left;
2452 int width;
2453
2454 if (!PyArg_ParseTuple(args, "i:center", &width))
2455 return NULL;
2456
Tim Peters8fa5dd02001-09-12 02:18:30 +00002457 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002458 Py_INCREF(self);
2459 return (PyObject*) self;
2460 }
2461
2462 marg = width - PyString_GET_SIZE(self);
2463 left = marg / 2 + (marg & width & 1);
2464
2465 return pad(self, left, marg - left, ' ');
2466}
2467
Walter Dörwald068325e2002-04-15 13:36:47 +00002468static char zfill__doc__[] =
2469"S.zfill(width) -> string\n"
2470"\n"
2471"Pad a numeric string S with zeros on the left, to fill a field\n"
2472"of the specified width. The string S is never truncated.";
2473
2474static PyObject *
2475string_zfill(PyStringObject *self, PyObject *args)
2476{
2477 int fill;
2478 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002479 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002480
2481 int width;
2482 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2483 return NULL;
2484
2485 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002486 if (PyString_CheckExact(self)) {
2487 Py_INCREF(self);
2488 return (PyObject*) self;
2489 }
2490 else
2491 return PyString_FromStringAndSize(
2492 PyString_AS_STRING(self),
2493 PyString_GET_SIZE(self)
2494 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002495 }
2496
2497 fill = width - PyString_GET_SIZE(self);
2498
2499 s = pad(self, fill, 0, '0');
2500
2501 if (s == NULL)
2502 return NULL;
2503
2504 p = PyString_AS_STRING(s);
2505 if (p[fill] == '+' || p[fill] == '-') {
2506 /* move sign to beginning of string */
2507 p[0] = p[fill];
2508 p[fill] = '0';
2509 }
2510
2511 return (PyObject*) s;
2512}
2513
Guido van Rossum4c08d552000-03-10 22:55:18 +00002514static char isspace__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002515"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002516"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002517"Return True if there are only whitespace characters in S,\n"
2518"False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002519
2520static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002521string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002522{
Fred Drakeba096332000-07-09 07:04:36 +00002523 register const unsigned char *p
2524 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002525 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002526
Guido van Rossum4c08d552000-03-10 22:55:18 +00002527 /* Shortcut for single character strings */
2528 if (PyString_GET_SIZE(self) == 1 &&
2529 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002530 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002531
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002532 /* Special case for empty strings */
2533 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002534 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002535
Guido van Rossum4c08d552000-03-10 22:55:18 +00002536 e = p + PyString_GET_SIZE(self);
2537 for (; p < e; p++) {
2538 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002539 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002540 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002541 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542}
2543
2544
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002545static char isalpha__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002546"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002547\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002548Return True if all characters in S are alphabetic\n\
2549and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002550
2551static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002552string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002553{
Fred Drakeba096332000-07-09 07:04:36 +00002554 register const unsigned char *p
2555 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002556 register const unsigned char *e;
2557
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002558 /* Shortcut for single character strings */
2559 if (PyString_GET_SIZE(self) == 1 &&
2560 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002561 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002562
2563 /* Special case for empty strings */
2564 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002565 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002566
2567 e = p + PyString_GET_SIZE(self);
2568 for (; p < e; p++) {
2569 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002570 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002571 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002572 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002573}
2574
2575
2576static char isalnum__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002577"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002578\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002579Return True if all characters in S are alphanumeric\n\
2580and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002581
2582static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002583string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002584{
Fred Drakeba096332000-07-09 07:04:36 +00002585 register const unsigned char *p
2586 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002587 register const unsigned char *e;
2588
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002589 /* Shortcut for single character strings */
2590 if (PyString_GET_SIZE(self) == 1 &&
2591 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002592 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002593
2594 /* Special case for empty strings */
2595 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002596 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002597
2598 e = p + PyString_GET_SIZE(self);
2599 for (; p < e; p++) {
2600 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002601 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002602 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002603 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002604}
2605
2606
Guido van Rossum4c08d552000-03-10 22:55:18 +00002607static char isdigit__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002608"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002609\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002610Return True if there are only digit characters in S,\n\
2611False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002612
2613static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002614string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002615{
Fred Drakeba096332000-07-09 07:04:36 +00002616 register const unsigned char *p
2617 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002618 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002619
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 /* Shortcut for single character strings */
2621 if (PyString_GET_SIZE(self) == 1 &&
2622 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002623 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002625 /* Special case for empty strings */
2626 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002627 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002628
Guido van Rossum4c08d552000-03-10 22:55:18 +00002629 e = p + PyString_GET_SIZE(self);
2630 for (; p < e; p++) {
2631 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002632 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002634 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002635}
2636
2637
2638static char islower__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002639"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002640\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002641Return True if all cased characters in S are lowercase and there is\n\
2642at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002643
2644static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002645string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646{
Fred Drakeba096332000-07-09 07:04:36 +00002647 register const unsigned char *p
2648 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002649 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002650 int cased;
2651
Guido van Rossum4c08d552000-03-10 22:55:18 +00002652 /* Shortcut for single character strings */
2653 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002654 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002656 /* Special case for empty strings */
2657 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002658 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002659
Guido van Rossum4c08d552000-03-10 22:55:18 +00002660 e = p + PyString_GET_SIZE(self);
2661 cased = 0;
2662 for (; p < e; p++) {
2663 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002664 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002665 else if (!cased && islower(*p))
2666 cased = 1;
2667 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002668 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669}
2670
2671
2672static char isupper__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002673"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002674\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002675Return True if all cased characters in S are uppercase and there is\n\
2676at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002677
2678static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002679string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002680{
Fred Drakeba096332000-07-09 07:04:36 +00002681 register const unsigned char *p
2682 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002683 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002684 int cased;
2685
Guido van Rossum4c08d552000-03-10 22:55:18 +00002686 /* Shortcut for single character strings */
2687 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002688 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002689
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002690 /* Special case for empty strings */
2691 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002692 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002693
Guido van Rossum4c08d552000-03-10 22:55:18 +00002694 e = p + PyString_GET_SIZE(self);
2695 cased = 0;
2696 for (; p < e; p++) {
2697 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002698 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002699 else if (!cased && isupper(*p))
2700 cased = 1;
2701 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002702 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002703}
2704
2705
2706static char istitle__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002707"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002708\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002709Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002710may only follow uncased characters and lowercase characters only cased\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002711ones. Return False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002712
2713static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002714string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002715{
Fred Drakeba096332000-07-09 07:04:36 +00002716 register const unsigned char *p
2717 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002718 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002719 int cased, previous_is_cased;
2720
Guido van Rossum4c08d552000-03-10 22:55:18 +00002721 /* Shortcut for single character strings */
2722 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002723 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002724
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002725 /* Special case for empty strings */
2726 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002727 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002728
Guido van Rossum4c08d552000-03-10 22:55:18 +00002729 e = p + PyString_GET_SIZE(self);
2730 cased = 0;
2731 previous_is_cased = 0;
2732 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002733 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002734
2735 if (isupper(ch)) {
2736 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002737 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002738 previous_is_cased = 1;
2739 cased = 1;
2740 }
2741 else if (islower(ch)) {
2742 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002743 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002744 previous_is_cased = 1;
2745 cased = 1;
2746 }
2747 else
2748 previous_is_cased = 0;
2749 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002750 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002751}
2752
2753
2754static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002755"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002756\n\
2757Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002758Line breaks are not included in the resulting list unless keepends\n\
2759is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002760
2761#define SPLIT_APPEND(data, left, right) \
2762 str = PyString_FromStringAndSize(data + left, right - left); \
2763 if (!str) \
2764 goto onError; \
2765 if (PyList_Append(list, str)) { \
2766 Py_DECREF(str); \
2767 goto onError; \
2768 } \
2769 else \
2770 Py_DECREF(str);
2771
2772static PyObject*
2773string_splitlines(PyStringObject *self, PyObject *args)
2774{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002775 register int i;
2776 register int j;
2777 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002778 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002779 PyObject *list;
2780 PyObject *str;
2781 char *data;
2782
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002783 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002784 return NULL;
2785
2786 data = PyString_AS_STRING(self);
2787 len = PyString_GET_SIZE(self);
2788
Guido van Rossum4c08d552000-03-10 22:55:18 +00002789 list = PyList_New(0);
2790 if (!list)
2791 goto onError;
2792
2793 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002794 int eol;
2795
Guido van Rossum4c08d552000-03-10 22:55:18 +00002796 /* Find a line and append it */
2797 while (i < len && data[i] != '\n' && data[i] != '\r')
2798 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002799
2800 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002801 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002802 if (i < len) {
2803 if (data[i] == '\r' && i + 1 < len &&
2804 data[i+1] == '\n')
2805 i += 2;
2806 else
2807 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002808 if (keepends)
2809 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002810 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002811 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002812 j = i;
2813 }
2814 if (j < len) {
2815 SPLIT_APPEND(data, j, len);
2816 }
2817
2818 return list;
2819
2820 onError:
2821 Py_DECREF(list);
2822 return NULL;
2823}
2824
2825#undef SPLIT_APPEND
2826
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002827
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002828static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002829string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002830 /* Counterparts of the obsolete stropmodule functions; except
2831 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002832 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2833 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2834 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2835 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002836 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2837 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2838 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2839 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2840 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2841 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2842 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002843 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
2844 capitalize__doc__},
2845 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2846 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2847 endswith__doc__},
2848 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2849 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2850 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2851 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2852 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2853 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2854 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2855 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2856 startswith__doc__},
2857 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2858 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
2859 swapcase__doc__},
2860 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2861 translate__doc__},
2862 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2863 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2864 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2865 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2866 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
2867 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2868 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2869 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
2870 expandtabs__doc__},
2871 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
2872 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002873 {NULL, NULL} /* sentinel */
2874};
2875
Guido van Rossumae960af2001-08-30 03:11:59 +00002876staticforward PyObject *
2877str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2878
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002879static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002880string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002881{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002882 PyObject *x = NULL;
2883 static char *kwlist[] = {"object", 0};
2884
Guido van Rossumae960af2001-08-30 03:11:59 +00002885 if (type != &PyString_Type)
2886 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002887 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2888 return NULL;
2889 if (x == NULL)
2890 return PyString_FromString("");
2891 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002892}
2893
Guido van Rossumae960af2001-08-30 03:11:59 +00002894static PyObject *
2895str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2896{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002897 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002898 int n;
2899
2900 assert(PyType_IsSubtype(type, &PyString_Type));
2901 tmp = string_new(&PyString_Type, args, kwds);
2902 if (tmp == NULL)
2903 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002904 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002905 n = PyString_GET_SIZE(tmp);
2906 pnew = type->tp_alloc(type, n);
2907 if (pnew != NULL) {
2908 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002909 ((PyStringObject *)pnew)->ob_shash =
2910 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002911 ((PyStringObject *)pnew)->ob_sinterned =
2912 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002913 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002914 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002915 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002916}
2917
Guido van Rossumcacfc072002-05-24 19:01:59 +00002918static PyObject *
2919basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2920{
2921 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002922 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00002923 return NULL;
2924}
2925
2926static char basestring_doc[] =
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002927"Type basestring cannot be instantiated; it is the base for str and unicode.";
Guido van Rossumcacfc072002-05-24 19:01:59 +00002928
2929PyTypeObject PyBaseString_Type = {
2930 PyObject_HEAD_INIT(&PyType_Type)
2931 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002932 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00002933 0,
2934 0,
2935 0, /* tp_dealloc */
2936 0, /* tp_print */
2937 0, /* tp_getattr */
2938 0, /* tp_setattr */
2939 0, /* tp_compare */
2940 0, /* tp_repr */
2941 0, /* tp_as_number */
2942 0, /* tp_as_sequence */
2943 0, /* tp_as_mapping */
2944 0, /* tp_hash */
2945 0, /* tp_call */
2946 0, /* tp_str */
2947 0, /* tp_getattro */
2948 0, /* tp_setattro */
2949 0, /* tp_as_buffer */
2950 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2951 basestring_doc, /* tp_doc */
2952 0, /* tp_traverse */
2953 0, /* tp_clear */
2954 0, /* tp_richcompare */
2955 0, /* tp_weaklistoffset */
2956 0, /* tp_iter */
2957 0, /* tp_iternext */
2958 0, /* tp_methods */
2959 0, /* tp_members */
2960 0, /* tp_getset */
2961 &PyBaseObject_Type, /* tp_base */
2962 0, /* tp_dict */
2963 0, /* tp_descr_get */
2964 0, /* tp_descr_set */
2965 0, /* tp_dictoffset */
2966 0, /* tp_init */
2967 0, /* tp_alloc */
2968 basestring_new, /* tp_new */
2969 0, /* tp_free */
2970};
2971
Tim Peters6d6c1a32001-08-02 04:15:00 +00002972static char string_doc[] =
2973"str(object) -> string\n\
2974\n\
2975Return a nice string representation of the object.\n\
2976If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002977
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002978PyTypeObject PyString_Type = {
2979 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002980 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002981 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002982 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002983 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002984 (destructor)string_dealloc, /* tp_dealloc */
2985 (printfunc)string_print, /* tp_print */
2986 0, /* tp_getattr */
2987 0, /* tp_setattr */
2988 0, /* tp_compare */
2989 (reprfunc)string_repr, /* tp_repr */
2990 0, /* tp_as_number */
2991 &string_as_sequence, /* tp_as_sequence */
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00002992 &string_as_mapping, /* tp_as_mapping */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002993 (hashfunc)string_hash, /* tp_hash */
2994 0, /* tp_call */
2995 (reprfunc)string_str, /* tp_str */
2996 PyObject_GenericGetAttr, /* tp_getattro */
2997 0, /* tp_setattro */
2998 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002999 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003000 string_doc, /* tp_doc */
3001 0, /* tp_traverse */
3002 0, /* tp_clear */
3003 (richcmpfunc)string_richcompare, /* tp_richcompare */
3004 0, /* tp_weaklistoffset */
3005 0, /* tp_iter */
3006 0, /* tp_iternext */
3007 string_methods, /* tp_methods */
3008 0, /* tp_members */
3009 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00003010 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00003011 0, /* tp_dict */
3012 0, /* tp_descr_get */
3013 0, /* tp_descr_set */
3014 0, /* tp_dictoffset */
3015 0, /* tp_init */
3016 0, /* tp_alloc */
3017 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00003018 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003019};
3020
3021void
Fred Drakeba096332000-07-09 07:04:36 +00003022PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003023{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003024 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00003025 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003026 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003027 if (w == NULL || !PyString_Check(*pv)) {
3028 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00003029 *pv = NULL;
3030 return;
3031 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003032 v = string_concat((PyStringObject *) *pv, w);
3033 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003034 *pv = v;
3035}
3036
Guido van Rossum013142a1994-08-30 08:19:36 +00003037void
Fred Drakeba096332000-07-09 07:04:36 +00003038PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00003039{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003040 PyString_Concat(pv, w);
3041 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00003042}
3043
3044
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003045/* The following function breaks the notion that strings are immutable:
3046 it changes the size of a string. We get away with this only if there
3047 is only one module referencing the object. You can also think of it
3048 as creating a new string object and destroying the old one, only
3049 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00003050 already be known to some other part of the code...
3051 Note that if there's not enough memory to resize the string, the original
3052 string object at *pv is deallocated, *pv is set to NULL, an "out of
3053 memory" exception is set, and -1 is returned. Else (on success) 0 is
3054 returned, and the value in *pv may or may not be the same as on input.
3055 As always, an extra byte is allocated for a trailing \0 byte (newsize
3056 does *not* include that), and a trailing \0 byte is stored.
3057*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003058
3059int
Fred Drakeba096332000-07-09 07:04:36 +00003060_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003061{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003062 register PyObject *v;
3063 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003064 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003065 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003066 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003067 Py_DECREF(v);
3068 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003069 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003070 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003071 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00003072#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00003073 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00003074#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003075 _Py_ForgetReference(v);
3076 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003077 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003078 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003079 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003080 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003081 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003082 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003083 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003084 _Py_NewReference(*pv);
3085 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003086 sv->ob_size = newsize;
3087 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003088 return 0;
3089}
Guido van Rossume5372401993-03-16 12:15:04 +00003090
3091/* Helpers for formatstring */
3092
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003093static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003094getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003095{
3096 int argidx = *p_argidx;
3097 if (argidx < arglen) {
3098 (*p_argidx)++;
3099 if (arglen < 0)
3100 return args;
3101 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003102 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003103 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003104 PyErr_SetString(PyExc_TypeError,
3105 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003106 return NULL;
3107}
3108
Tim Peters38fd5b62000-09-21 05:43:11 +00003109/* Format codes
3110 * F_LJUST '-'
3111 * F_SIGN '+'
3112 * F_BLANK ' '
3113 * F_ALT '#'
3114 * F_ZERO '0'
3115 */
Guido van Rossume5372401993-03-16 12:15:04 +00003116#define F_LJUST (1<<0)
3117#define F_SIGN (1<<1)
3118#define F_BLANK (1<<2)
3119#define F_ALT (1<<3)
3120#define F_ZERO (1<<4)
3121
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003122static int
Fred Drakeba096332000-07-09 07:04:36 +00003123formatfloat(char *buf, size_t buflen, int flags,
3124 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003125{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003126 /* fmt = '%#.' + `prec` + `type`
3127 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003128 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003129 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003130 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003131 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003132 if (prec < 0)
3133 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003134 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3135 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003136 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3137 (flags&F_ALT) ? "#" : "",
3138 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003139 /* worst case length calc to ensure no buffer overrun:
3140 fmt = %#.<prec>g
3141 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003142 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003143 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3144 If prec=0 the effective precision is 1 (the leading digit is
3145 always given), therefore increase by one to 10+prec. */
3146 if (buflen <= (size_t)10 + (size_t)prec) {
3147 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003148 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003149 return -1;
3150 }
Tim Peters885d4572001-11-28 20:27:42 +00003151 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003152 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003153}
3154
Tim Peters38fd5b62000-09-21 05:43:11 +00003155/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3156 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3157 * Python's regular ints.
3158 * Return value: a new PyString*, or NULL if error.
3159 * . *pbuf is set to point into it,
3160 * *plen set to the # of chars following that.
3161 * Caller must decref it when done using pbuf.
3162 * The string starting at *pbuf is of the form
3163 * "-"? ("0x" | "0X")? digit+
3164 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003165 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003166 * There will be at least prec digits, zero-filled on the left if
3167 * necessary to get that many.
3168 * val object to be converted
3169 * flags bitmask of format flags; only F_ALT is looked at
3170 * prec minimum number of digits; 0-fill on left if needed
3171 * type a character in [duoxX]; u acts the same as d
3172 *
3173 * CAUTION: o, x and X conversions on regular ints can never
3174 * produce a '-' sign, but can for Python's unbounded ints.
3175 */
3176PyObject*
3177_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3178 char **pbuf, int *plen)
3179{
3180 PyObject *result = NULL;
3181 char *buf;
3182 int i;
3183 int sign; /* 1 if '-', else 0 */
3184 int len; /* number of characters */
3185 int numdigits; /* len == numnondigits + numdigits */
3186 int numnondigits = 0;
3187
3188 switch (type) {
3189 case 'd':
3190 case 'u':
3191 result = val->ob_type->tp_str(val);
3192 break;
3193 case 'o':
3194 result = val->ob_type->tp_as_number->nb_oct(val);
3195 break;
3196 case 'x':
3197 case 'X':
3198 numnondigits = 2;
3199 result = val->ob_type->tp_as_number->nb_hex(val);
3200 break;
3201 default:
3202 assert(!"'type' not in [duoxX]");
3203 }
3204 if (!result)
3205 return NULL;
3206
3207 /* To modify the string in-place, there can only be one reference. */
3208 if (result->ob_refcnt != 1) {
3209 PyErr_BadInternalCall();
3210 return NULL;
3211 }
3212 buf = PyString_AsString(result);
3213 len = PyString_Size(result);
3214 if (buf[len-1] == 'L') {
3215 --len;
3216 buf[len] = '\0';
3217 }
3218 sign = buf[0] == '-';
3219 numnondigits += sign;
3220 numdigits = len - numnondigits;
3221 assert(numdigits > 0);
3222
Tim Petersfff53252001-04-12 18:38:48 +00003223 /* Get rid of base marker unless F_ALT */
3224 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003225 /* Need to skip 0x, 0X or 0. */
3226 int skipped = 0;
3227 switch (type) {
3228 case 'o':
3229 assert(buf[sign] == '0');
3230 /* If 0 is only digit, leave it alone. */
3231 if (numdigits > 1) {
3232 skipped = 1;
3233 --numdigits;
3234 }
3235 break;
3236 case 'x':
3237 case 'X':
3238 assert(buf[sign] == '0');
3239 assert(buf[sign + 1] == 'x');
3240 skipped = 2;
3241 numnondigits -= 2;
3242 break;
3243 }
3244 if (skipped) {
3245 buf += skipped;
3246 len -= skipped;
3247 if (sign)
3248 buf[0] = '-';
3249 }
3250 assert(len == numnondigits + numdigits);
3251 assert(numdigits > 0);
3252 }
3253
3254 /* Fill with leading zeroes to meet minimum width. */
3255 if (prec > numdigits) {
3256 PyObject *r1 = PyString_FromStringAndSize(NULL,
3257 numnondigits + prec);
3258 char *b1;
3259 if (!r1) {
3260 Py_DECREF(result);
3261 return NULL;
3262 }
3263 b1 = PyString_AS_STRING(r1);
3264 for (i = 0; i < numnondigits; ++i)
3265 *b1++ = *buf++;
3266 for (i = 0; i < prec - numdigits; i++)
3267 *b1++ = '0';
3268 for (i = 0; i < numdigits; i++)
3269 *b1++ = *buf++;
3270 *b1 = '\0';
3271 Py_DECREF(result);
3272 result = r1;
3273 buf = PyString_AS_STRING(result);
3274 len = numnondigits + prec;
3275 }
3276
3277 /* Fix up case for hex conversions. */
3278 switch (type) {
3279 case 'x':
3280 /* Need to convert all upper case letters to lower case. */
3281 for (i = 0; i < len; i++)
3282 if (buf[i] >= 'A' && buf[i] <= 'F')
3283 buf[i] += 'a'-'A';
3284 break;
3285 case 'X':
3286 /* Need to convert 0x to 0X (and -0x to -0X). */
3287 if (buf[sign + 1] == 'x')
3288 buf[sign + 1] = 'X';
3289 break;
3290 }
3291 *pbuf = buf;
3292 *plen = len;
3293 return result;
3294}
3295
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003296static int
Fred Drakeba096332000-07-09 07:04:36 +00003297formatint(char *buf, size_t buflen, int flags,
3298 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003299{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003300 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003301 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3302 + 1 + 1 = 24 */
3303 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003304 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003305
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003306 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003307 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003308 if (prec < 0)
3309 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003310
3311 if ((flags & F_ALT) &&
3312 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003313 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003314 * of issues that cause pain:
3315 * - when 0 is being converted, the C standard leaves off
3316 * the '0x' or '0X', which is inconsistent with other
3317 * %#x/%#X conversions and inconsistent with Python's
3318 * hex() function
3319 * - there are platforms that violate the standard and
3320 * convert 0 with the '0x' or '0X'
3321 * (Metrowerks, Compaq Tru64)
3322 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003323 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003324 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003325 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003326 * We can achieve the desired consistency by inserting our
3327 * own '0x' or '0X' prefix, and substituting %x/%X in place
3328 * of %#x/%#X.
3329 *
3330 * Note that this is the same approach as used in
3331 * formatint() in unicodeobject.c
3332 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003333 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003334 type, prec, type);
3335 }
3336 else {
3337 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003338 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003339 prec, type);
3340 }
3341
Tim Peters38fd5b62000-09-21 05:43:11 +00003342 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003343 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3344 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003345 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003346 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003347 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003348 return -1;
3349 }
Tim Peters885d4572001-11-28 20:27:42 +00003350 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003351 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003352}
3353
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003354static int
Fred Drakeba096332000-07-09 07:04:36 +00003355formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003356{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003357 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003358 if (PyString_Check(v)) {
3359 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003360 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003361 }
3362 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003363 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003364 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003365 }
3366 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003367 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003368}
3369
Guido van Rossum013142a1994-08-30 08:19:36 +00003370
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003371/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3372
3373 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3374 chars are formatted. XXX This is a magic number. Each formatting
3375 routine does bounds checking to ensure no overflow, but a better
3376 solution may be to malloc a buffer of appropriate size for each
3377 format. For now, the current solution is sufficient.
3378*/
3379#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003380
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003381PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003382PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003383{
3384 char *fmt, *res;
3385 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003386 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003387 PyObject *result, *orig_args;
3388#ifdef Py_USING_UNICODE
3389 PyObject *v, *w;
3390#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003391 PyObject *dict = NULL;
3392 if (format == NULL || !PyString_Check(format) || args == NULL) {
3393 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003394 return NULL;
3395 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003396 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003397 fmt = PyString_AS_STRING(format);
3398 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003399 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003400 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003401 if (result == NULL)
3402 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003403 res = PyString_AsString(result);
3404 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003405 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003406 argidx = 0;
3407 }
3408 else {
3409 arglen = -1;
3410 argidx = -2;
3411 }
Michael W. Hudson5efaf7e2002-06-11 10:55:12 +00003412 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args))
Guido van Rossum013142a1994-08-30 08:19:36 +00003413 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003414 while (--fmtcnt >= 0) {
3415 if (*fmt != '%') {
3416 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003417 rescnt = fmtcnt + 100;
3418 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003419 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003420 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003421 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003422 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003423 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003424 }
3425 *res++ = *fmt++;
3426 }
3427 else {
3428 /* Got a format specifier */
3429 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003430 int width = -1;
3431 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003432 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003433 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003434 PyObject *v = NULL;
3435 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003436 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003437 int sign;
3438 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003439 char formatbuf[FORMATBUFLEN];
3440 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003441#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003442 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003443 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003444#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003445
Guido van Rossumda9c2711996-12-05 21:58:58 +00003446 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003447 if (*fmt == '(') {
3448 char *keystart;
3449 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003450 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003451 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003452
3453 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003454 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003455 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003456 goto error;
3457 }
3458 ++fmt;
3459 --fmtcnt;
3460 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003461 /* Skip over balanced parentheses */
3462 while (pcount > 0 && --fmtcnt >= 0) {
3463 if (*fmt == ')')
3464 --pcount;
3465 else if (*fmt == '(')
3466 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003467 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003468 }
3469 keylen = fmt - keystart - 1;
3470 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003471 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003472 "incomplete format key");
3473 goto error;
3474 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003475 key = PyString_FromStringAndSize(keystart,
3476 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003477 if (key == NULL)
3478 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003479 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003480 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003481 args_owned = 0;
3482 }
3483 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003484 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003485 if (args == NULL) {
3486 goto error;
3487 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003488 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003489 arglen = -1;
3490 argidx = -2;
3491 }
Guido van Rossume5372401993-03-16 12:15:04 +00003492 while (--fmtcnt >= 0) {
3493 switch (c = *fmt++) {
3494 case '-': flags |= F_LJUST; continue;
3495 case '+': flags |= F_SIGN; continue;
3496 case ' ': flags |= F_BLANK; continue;
3497 case '#': flags |= F_ALT; continue;
3498 case '0': flags |= F_ZERO; continue;
3499 }
3500 break;
3501 }
3502 if (c == '*') {
3503 v = getnextarg(args, arglen, &argidx);
3504 if (v == NULL)
3505 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003506 if (!PyInt_Check(v)) {
3507 PyErr_SetString(PyExc_TypeError,
3508 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003509 goto error;
3510 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003511 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003512 if (width < 0) {
3513 flags |= F_LJUST;
3514 width = -width;
3515 }
Guido van Rossume5372401993-03-16 12:15:04 +00003516 if (--fmtcnt >= 0)
3517 c = *fmt++;
3518 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003519 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003520 width = c - '0';
3521 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003522 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003523 if (!isdigit(c))
3524 break;
3525 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003526 PyErr_SetString(
3527 PyExc_ValueError,
3528 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003529 goto error;
3530 }
3531 width = width*10 + (c - '0');
3532 }
3533 }
3534 if (c == '.') {
3535 prec = 0;
3536 if (--fmtcnt >= 0)
3537 c = *fmt++;
3538 if (c == '*') {
3539 v = getnextarg(args, arglen, &argidx);
3540 if (v == NULL)
3541 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003542 if (!PyInt_Check(v)) {
3543 PyErr_SetString(
3544 PyExc_TypeError,
3545 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003546 goto error;
3547 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003548 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003549 if (prec < 0)
3550 prec = 0;
3551 if (--fmtcnt >= 0)
3552 c = *fmt++;
3553 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003554 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003555 prec = c - '0';
3556 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003557 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003558 if (!isdigit(c))
3559 break;
3560 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003561 PyErr_SetString(
3562 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003563 "prec too big");
3564 goto error;
3565 }
3566 prec = prec*10 + (c - '0');
3567 }
3568 }
3569 } /* prec */
3570 if (fmtcnt >= 0) {
3571 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003572 if (--fmtcnt >= 0)
3573 c = *fmt++;
3574 }
3575 }
3576 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003577 PyErr_SetString(PyExc_ValueError,
3578 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003579 goto error;
3580 }
3581 if (c != '%') {
3582 v = getnextarg(args, arglen, &argidx);
3583 if (v == NULL)
3584 goto error;
3585 }
3586 sign = 0;
3587 fill = ' ';
3588 switch (c) {
3589 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003590 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003591 len = 1;
3592 break;
3593 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003594 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003595#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003596 if (PyUnicode_Check(v)) {
3597 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003598 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003599 goto unicode;
3600 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003601#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003602 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003603 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003604 else
3605 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003606 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003607 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003608 if (!PyString_Check(temp)) {
3609 PyErr_SetString(PyExc_TypeError,
3610 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003611 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003612 goto error;
3613 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003614 pbuf = PyString_AS_STRING(temp);
3615 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003616 if (prec >= 0 && len > prec)
3617 len = prec;
3618 break;
3619 case 'i':
3620 case 'd':
3621 case 'u':
3622 case 'o':
3623 case 'x':
3624 case 'X':
3625 if (c == 'i')
3626 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003627 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003628 temp = _PyString_FormatLong(v, flags,
3629 prec, c, &pbuf, &len);
3630 if (!temp)
3631 goto error;
3632 /* unbounded ints can always produce
3633 a sign character! */
3634 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003635 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003636 else {
3637 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003638 len = formatint(pbuf,
3639 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003640 flags, prec, c, v);
3641 if (len < 0)
3642 goto error;
3643 /* only d conversion is signed */
3644 sign = c == 'd';
3645 }
3646 if (flags & F_ZERO)
3647 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003648 break;
3649 case 'e':
3650 case 'E':
3651 case 'f':
3652 case 'g':
3653 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003654 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003655 len = formatfloat(pbuf, sizeof(formatbuf),
3656 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003657 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003658 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003659 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003660 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003661 fill = '0';
3662 break;
3663 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003664 pbuf = formatbuf;
3665 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003666 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003667 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003668 break;
3669 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003670 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003671 "unsupported format character '%c' (0x%x) "
3672 "at index %i",
3673 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003674 goto error;
3675 }
3676 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003677 if (*pbuf == '-' || *pbuf == '+') {
3678 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003679 len--;
3680 }
3681 else if (flags & F_SIGN)
3682 sign = '+';
3683 else if (flags & F_BLANK)
3684 sign = ' ';
3685 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003686 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003687 }
3688 if (width < len)
3689 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003690 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003691 reslen -= rescnt;
3692 rescnt = width + fmtcnt + 100;
3693 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003694 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003695 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003696 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003697 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003698 }
3699 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003700 if (fill != ' ')
3701 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003702 rescnt--;
3703 if (width > len)
3704 width--;
3705 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003706 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3707 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003708 assert(pbuf[1] == c);
3709 if (fill != ' ') {
3710 *res++ = *pbuf++;
3711 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003712 }
Tim Petersfff53252001-04-12 18:38:48 +00003713 rescnt -= 2;
3714 width -= 2;
3715 if (width < 0)
3716 width = 0;
3717 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003718 }
3719 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003720 do {
3721 --rescnt;
3722 *res++ = fill;
3723 } while (--width > len);
3724 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003725 if (fill == ' ') {
3726 if (sign)
3727 *res++ = sign;
3728 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003729 (c == 'x' || c == 'X')) {
3730 assert(pbuf[0] == '0');
3731 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003732 *res++ = *pbuf++;
3733 *res++ = *pbuf++;
3734 }
3735 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003736 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003737 res += len;
3738 rescnt -= len;
3739 while (--width >= len) {
3740 --rescnt;
3741 *res++ = ' ';
3742 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003743 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003744 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003745 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003746 goto error;
3747 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003748 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003749 } /* '%' */
3750 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003751 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003752 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003753 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003754 goto error;
3755 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003756 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003757 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003758 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003759 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003760 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003761
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003762#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003763 unicode:
3764 if (args_owned) {
3765 Py_DECREF(args);
3766 args_owned = 0;
3767 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003768 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003769 if (PyTuple_Check(orig_args) && argidx > 0) {
3770 PyObject *v;
3771 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3772 v = PyTuple_New(n);
3773 if (v == NULL)
3774 goto error;
3775 while (--n >= 0) {
3776 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3777 Py_INCREF(w);
3778 PyTuple_SET_ITEM(v, n, w);
3779 }
3780 args = v;
3781 } else {
3782 Py_INCREF(orig_args);
3783 args = orig_args;
3784 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003785 args_owned = 1;
3786 /* Take what we have of the result and let the Unicode formatting
3787 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003788 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003789 if (_PyString_Resize(&result, rescnt))
3790 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003791 fmtcnt = PyString_GET_SIZE(format) - \
3792 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003793 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3794 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003795 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003796 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003797 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003798 if (v == NULL)
3799 goto error;
3800 /* Paste what we have (result) to what the Unicode formatting
3801 function returned (v) and return the result (or error) */
3802 w = PyUnicode_Concat(result, v);
3803 Py_DECREF(result);
3804 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003805 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003806 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003807#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003808
Guido van Rossume5372401993-03-16 12:15:04 +00003809 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003810 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003811 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003812 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003813 }
Guido van Rossume5372401993-03-16 12:15:04 +00003814 return NULL;
3815}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003816
3817
Guido van Rossum2a61e741997-01-18 07:55:05 +00003818
Barry Warsaw4df762f2000-08-16 23:41:01 +00003819/* This dictionary will leak at PyString_Fini() time. That's acceptable
3820 * because PyString_Fini() specifically frees interned strings that are
3821 * only referenced by this dictionary. The CVS log entry for revision 2.45
3822 * says:
3823 *
3824 * Change the Fini function to only remove otherwise unreferenced
3825 * strings from the interned table. There are references in
3826 * hard-to-find static variables all over the interpreter, and it's not
3827 * worth trying to get rid of all those; but "uninterning" isn't fair
3828 * either and may cause subtle failures later -- so we have to keep them
3829 * in the interned table.
3830 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003831static PyObject *interned;
3832
3833void
Fred Drakeba096332000-07-09 07:04:36 +00003834PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003835{
3836 register PyStringObject *s = (PyStringObject *)(*p);
3837 PyObject *t;
3838 if (s == NULL || !PyString_Check(s))
3839 Py_FatalError("PyString_InternInPlace: strings only please!");
3840 if ((t = s->ob_sinterned) != NULL) {
3841 if (t == (PyObject *)s)
3842 return;
3843 Py_INCREF(t);
3844 *p = t;
3845 Py_DECREF(s);
3846 return;
3847 }
3848 if (interned == NULL) {
3849 interned = PyDict_New();
3850 if (interned == NULL)
3851 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003852 }
3853 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3854 Py_INCREF(t);
3855 *p = s->ob_sinterned = t;
3856 Py_DECREF(s);
3857 return;
3858 }
Tim Peters111f6092001-09-12 07:54:51 +00003859 /* Ensure that only true string objects appear in the intern dict,
3860 and as the value of ob_sinterned. */
3861 if (PyString_CheckExact(s)) {
3862 t = (PyObject *)s;
3863 if (PyDict_SetItem(interned, t, t) == 0) {
3864 s->ob_sinterned = t;
3865 return;
3866 }
3867 }
3868 else {
3869 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3870 PyString_GET_SIZE(s));
3871 if (t != NULL) {
3872 if (PyDict_SetItem(interned, t, t) == 0) {
3873 *p = s->ob_sinterned = t;
3874 Py_DECREF(s);
3875 return;
3876 }
3877 Py_DECREF(t);
3878 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003879 }
3880 PyErr_Clear();
3881}
3882
3883
3884PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003885PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003886{
3887 PyObject *s = PyString_FromString(cp);
3888 if (s == NULL)
3889 return NULL;
3890 PyString_InternInPlace(&s);
3891 return s;
3892}
3893
Guido van Rossum8cf04761997-08-02 02:57:45 +00003894void
Fred Drakeba096332000-07-09 07:04:36 +00003895PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003896{
3897 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003898 for (i = 0; i < UCHAR_MAX + 1; i++) {
3899 Py_XDECREF(characters[i]);
3900 characters[i] = NULL;
3901 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003902 Py_XDECREF(nullstring);
3903 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003904 if (interned) {
3905 int pos, changed;
3906 PyObject *key, *value;
3907 do {
3908 changed = 0;
3909 pos = 0;
3910 while (PyDict_Next(interned, &pos, &key, &value)) {
3911 if (key->ob_refcnt == 2 && key == value) {
3912 PyDict_DelItem(interned, key);
3913 changed = 1;
3914 }
3915 }
3916 } while (changed);
3917 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003918}
Barry Warsawa903ad982001-02-23 16:40:48 +00003919
Barry Warsawa903ad982001-02-23 16:40:48 +00003920void _Py_ReleaseInternedStrings(void)
3921{
3922 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003923 fprintf(stderr, "releasing interned strings\n");
3924 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003925 Py_DECREF(interned);
3926 interned = NULL;
3927 }
3928}