blob: 89e414af47636632726d04499693f6976cfe7494 [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
18/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000019 For both PyString_FromString() and PyString_FromStringAndSize(), the
20 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000021 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000022
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000023 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000025
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For PyString_FromStringAndSize(), the parameter the parameter `str' is
27 either NULL or else points to a string containing at least `size' bytes.
28 For PyString_FromStringAndSize(), the string in the `str' parameter does
29 not have to be null-terminated. (Therefore it is safe to construct a
30 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
31 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
32 bytes (setting the last byte to the null terminating character) and you can
33 fill in the data yourself. If `str' is non-NULL then the resulting
34 PyString object must be treated as immutable and you must not fill in nor
35 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 The PyObject member `op->ob_size', which denotes the number of "extra
38 items" in a variable-size object, will contain the number of bytes
39 allocated for string data, not counting the null terminating character. It
40 is therefore equal to the equal to the `size' parameter (for
41 PyString_FromStringAndSize()) or the length of the string in the `str'
42 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000067 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000123 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
159 count = vargs;
160#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000161 /* step 1: figure out how large a buffer we need */
162 for (f = format; *f; f++) {
163 if (*f == '%') {
164 const char* p = f;
165 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
166 ;
167
168 /* skip the 'l' in %ld, since it doesn't change the
169 width. although only %d is supported (see
170 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000171 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000172 if (*f == 'l' && *(f+1) == 'd')
173 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000174
Barry Warsawdadace02001-08-24 18:32:06 +0000175 switch (*f) {
176 case 'c':
177 (void)va_arg(count, int);
178 /* fall through... */
179 case '%':
180 n++;
181 break;
182 case 'd': case 'i': case 'x':
183 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000184 /* 20 bytes is enough to hold a 64-bit
185 integer. Decimal takes the most space.
186 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 n += 20;
188 break;
189 case 's':
190 s = va_arg(count, char*);
191 n += strlen(s);
192 break;
193 case 'p':
194 (void) va_arg(count, int);
195 /* maximum 64-bit pointer representation:
196 * 0xffffffffffffffff
197 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000198 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000199 */
200 n += 19;
201 break;
202 default:
203 /* if we stumble upon an unknown
204 formatting code, copy the rest of
205 the format string to the output
206 string. (we cannot just skip the
207 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000208 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000209 n += strlen(p);
210 goto expand;
211 }
212 } else
213 n++;
214 }
215 expand:
216 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000217 /* Since we've analyzed how much space we need for the worst case,
218 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 string = PyString_FromStringAndSize(NULL, n);
220 if (!string)
221 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000222
Barry Warsawdadace02001-08-24 18:32:06 +0000223 s = PyString_AsString(string);
224
225 for (f = format; *f; f++) {
226 if (*f == '%') {
227 const char* p = f++;
228 int i, longflag = 0;
229 /* parse the width.precision part (we're only
230 interested in the precision value, if any) */
231 n = 0;
232 while (isdigit(Py_CHARMASK(*f)))
233 n = (n*10) + *f++ - '0';
234 if (*f == '.') {
235 f++;
236 n = 0;
237 while (isdigit(Py_CHARMASK(*f)))
238 n = (n*10) + *f++ - '0';
239 }
240 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
241 f++;
242 /* handle the long flag, but only for %ld. others
243 can be added when necessary. */
244 if (*f == 'l' && *(f+1) == 'd') {
245 longflag = 1;
246 ++f;
247 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000248
Barry Warsawdadace02001-08-24 18:32:06 +0000249 switch (*f) {
250 case 'c':
251 *s++ = va_arg(vargs, int);
252 break;
253 case 'd':
254 if (longflag)
255 sprintf(s, "%ld", va_arg(vargs, long));
256 else
257 sprintf(s, "%d", va_arg(vargs, int));
258 s += strlen(s);
259 break;
260 case 'i':
261 sprintf(s, "%i", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'x':
265 sprintf(s, "%x", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 's':
269 p = va_arg(vargs, char*);
270 i = strlen(p);
271 if (n > 0 && i > n)
272 i = n;
273 memcpy(s, p, i);
274 s += i;
275 break;
276 case 'p':
277 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000278 /* %p is ill-defined: ensure leading 0x. */
279 if (s[1] == 'X')
280 s[1] = 'x';
281 else if (s[1] != 'x') {
282 memmove(s+2, s, strlen(s)+1);
283 s[0] = '0';
284 s[1] = 'x';
285 }
Barry Warsawdadace02001-08-24 18:32:06 +0000286 s += strlen(s);
287 break;
288 case '%':
289 *s++ = '%';
290 break;
291 default:
292 strcpy(s, p);
293 s += strlen(s);
294 goto end;
295 }
296 } else
297 *s++ = *f;
298 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000299
Barry Warsawdadace02001-08-24 18:32:06 +0000300 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000301 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000302 return string;
303}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000304
Barry Warsawdadace02001-08-24 18:32:06 +0000305PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000306PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000307{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000308 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000309 va_list vargs;
310
311#ifdef HAVE_STDARG_PROTOTYPES
312 va_start(vargs, format);
313#else
314 va_start(vargs);
315#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 ret = PyString_FromFormatV(format, vargs);
317 va_end(vargs);
318 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319}
320
321
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000322PyObject *PyString_Decode(const char *s,
323 int size,
324 const char *encoding,
325 const char *errors)
326{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000327 PyObject *v, *str;
328
329 str = PyString_FromStringAndSize(s, size);
330 if (str == NULL)
331 return NULL;
332 v = PyString_AsDecodedString(str, encoding, errors);
333 Py_DECREF(str);
334 return v;
335}
336
337PyObject *PyString_AsDecodedObject(PyObject *str,
338 const char *encoding,
339 const char *errors)
340{
341 PyObject *v;
342
343 if (!PyString_Check(str)) {
344 PyErr_BadArgument();
345 goto onError;
346 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000347
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000348 if (encoding == NULL) {
349#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000351#else
352 PyErr_SetString(PyExc_ValueError, "no encoding specified");
353 goto onError;
354#endif
355 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356
357 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000358 v = PyCodec_Decode(str, encoding, errors);
359 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361
362 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000363
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365 return NULL;
366}
367
368PyObject *PyString_AsDecodedString(PyObject *str,
369 const char *encoding,
370 const char *errors)
371{
372 PyObject *v;
373
374 v = PyString_AsDecodedObject(str, encoding, errors);
375 if (v == NULL)
376 goto onError;
377
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000378#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000379 /* Convert Unicode to a string using the default encoding */
380 if (PyUnicode_Check(v)) {
381 PyObject *temp = v;
382 v = PyUnicode_AsEncodedString(v, NULL, NULL);
383 Py_DECREF(temp);
384 if (v == NULL)
385 goto onError;
386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 if (!PyString_Check(v)) {
389 PyErr_Format(PyExc_TypeError,
390 "decoder did not return a string object (type=%.400s)",
391 v->ob_type->tp_name);
392 Py_DECREF(v);
393 goto onError;
394 }
395
396 return v;
397
398 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 return NULL;
400}
401
402PyObject *PyString_Encode(const char *s,
403 int size,
404 const char *encoding,
405 const char *errors)
406{
407 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 str = PyString_FromStringAndSize(s, size);
410 if (str == NULL)
411 return NULL;
412 v = PyString_AsEncodedString(str, encoding, errors);
413 Py_DECREF(str);
414 return v;
415}
416
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000417PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 const char *encoding,
419 const char *errors)
420{
421 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000422
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000423 if (!PyString_Check(str)) {
424 PyErr_BadArgument();
425 goto onError;
426 }
427
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000428 if (encoding == NULL) {
429#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000431#else
432 PyErr_SetString(PyExc_ValueError, "no encoding specified");
433 goto onError;
434#endif
435 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436
437 /* Encode via the codec registry */
438 v = PyCodec_Encode(str, encoding, errors);
439 if (v == NULL)
440 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000441
442 return v;
443
444 onError:
445 return NULL;
446}
447
448PyObject *PyString_AsEncodedString(PyObject *str,
449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v;
453
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000454 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000455 if (v == NULL)
456 goto onError;
457
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000458#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 /* Convert Unicode to a string using the default encoding */
460 if (PyUnicode_Check(v)) {
461 PyObject *temp = v;
462 v = PyUnicode_AsEncodedString(v, NULL, NULL);
463 Py_DECREF(temp);
464 if (v == NULL)
465 goto onError;
466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(v)) {
469 PyErr_Format(PyExc_TypeError,
470 "encoder did not return a string object (type=%.400s)",
471 v->ob_type->tp_name);
472 Py_DECREF(v);
473 goto onError;
474 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000476 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000477
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 onError:
479 return NULL;
480}
481
Guido van Rossum234f9421993-06-17 12:35:49 +0000482static void
Fred Drakeba096332000-07-09 07:04:36 +0000483string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000484{
Guido van Rossum9475a232001-10-05 20:51:39 +0000485 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000486}
487
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000488static int
489string_getsize(register PyObject *op)
490{
491 char *s;
492 int len;
493 if (PyString_AsStringAndSize(op, &s, &len))
494 return -1;
495 return len;
496}
497
498static /*const*/ char *
499string_getbuffer(register PyObject *op)
500{
501 char *s;
502 int len;
503 if (PyString_AsStringAndSize(op, &s, &len))
504 return NULL;
505 return s;
506}
507
Guido van Rossumd7047b31995-01-02 19:07:15 +0000508int
Fred Drakeba096332000-07-09 07:04:36 +0000509PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000511 if (!PyString_Check(op))
512 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000513 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
516/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000517PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519 if (!PyString_Check(op))
520 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000521 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522}
523
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000524int
525PyString_AsStringAndSize(register PyObject *obj,
526 register char **s,
527 register int *len)
528{
529 if (s == NULL) {
530 PyErr_BadInternalCall();
531 return -1;
532 }
533
534 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000536 if (PyUnicode_Check(obj)) {
537 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
538 if (obj == NULL)
539 return -1;
540 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000541 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000542#endif
543 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000544 PyErr_Format(PyExc_TypeError,
545 "expected string or Unicode object, "
546 "%.200s found", obj->ob_type->tp_name);
547 return -1;
548 }
549 }
550
551 *s = PyString_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyString_GET_SIZE(obj);
554 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected string without null bytes");
557 return -1;
558 }
559 return 0;
560}
561
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562/* Methods */
563
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564static int
Fred Drakeba096332000-07-09 07:04:36 +0000565string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566{
567 int i;
568 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000570
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000571 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000572 if (! PyString_CheckExact(op)) {
573 int ret;
574 /* A str subclass may have its own __str__ method. */
575 op = (PyStringObject *) PyObject_Str((PyObject *)op);
576 if (op == NULL)
577 return -1;
578 ret = string_print(op, fp, flags);
579 Py_DECREF(op);
580 return ret;
581 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000582 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000584 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000586
Thomas Wouters7e474022000-07-16 12:04:32 +0000587 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000588 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000589 if (memchr(op->ob_sval, '\'', op->ob_size) &&
590 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 quote = '"';
592
593 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594 for (i = 0; i < op->ob_size; i++) {
595 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000596 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000598 else if (c == '\t')
599 fprintf(fp, "\\t");
600 else if (c == '\n')
601 fprintf(fp, "\\n");
602 else if (c == '\r')
603 fprintf(fp, "\\r");
604 else if (c < ' ' || c >= 0x7f)
605 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000606 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000607 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000608 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000609 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000610 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000611}
612
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000613static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000614string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000616 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
617 PyObject *v;
618 if (newsize > INT_MAX) {
619 PyErr_SetString(PyExc_OverflowError,
620 "string is too large to make repr");
621 }
622 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000624 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000625 }
626 else {
627 register int i;
628 register char c;
629 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000630 int quote;
631
Thomas Wouters7e474022000-07-16 12:04:32 +0000632 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000633 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000634 if (memchr(op->ob_sval, '\'', op->ob_size) &&
635 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000636 quote = '"';
637
Tim Peters9161c8b2001-12-03 01:55:38 +0000638 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000639 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000641 /* There's at least enough room for a hex escape
642 and a closing quote. */
643 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000645 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000646 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000647 else if (c == '\t')
648 *p++ = '\\', *p++ = 't';
649 else if (c == '\n')
650 *p++ = '\\', *p++ = 'n';
651 else if (c == '\r')
652 *p++ = '\\', *p++ = 'r';
653 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000654 /* For performance, we don't want to call
655 PyOS_snprintf here (extra layers of
656 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000657 sprintf(p, "\\x%02x", c & 0xff);
658 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000659 }
660 else
661 *p++ = c;
662 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000663 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000664 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000666 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000667 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000668 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670}
671
Guido van Rossum189f1df2001-05-01 16:51:53 +0000672static PyObject *
673string_str(PyObject *s)
674{
Tim Petersc9933152001-10-16 20:18:24 +0000675 assert(PyString_Check(s));
676 if (PyString_CheckExact(s)) {
677 Py_INCREF(s);
678 return s;
679 }
680 else {
681 /* Subtype -- return genuine string with the same value. */
682 PyStringObject *t = (PyStringObject *) s;
683 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
684 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000685}
686
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687static int
Fred Drakeba096332000-07-09 07:04:36 +0000688string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689{
690 return a->ob_size;
691}
692
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000694string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
696 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000697 register PyStringObject *op;
698 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000699#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (PyUnicode_Check(bb))
701 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000702#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000703 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000704 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000705 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706 return NULL;
707 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000708#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000710 if ((a->ob_size == 0 || b->ob_size == 0) &&
711 PyString_CheckExact(a) && PyString_CheckExact(b)) {
712 if (a->ob_size == 0) {
713 Py_INCREF(bb);
714 return bb;
715 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000716 Py_INCREF(a);
717 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718 }
719 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000720 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000721 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000722 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000723 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000724 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000725 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000726 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000727 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000728 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
729 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
730 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000731 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732#undef b
733}
734
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000736string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000737{
738 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000739 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000740 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000741 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742 if (n < 0)
743 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000744 /* watch out for overflows: the size can overflow int,
745 * and the # of bytes needed can overflow size_t
746 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000748 if (n && size / n != a->ob_size) {
749 PyErr_SetString(PyExc_OverflowError,
750 "repeated string is too long");
751 return NULL;
752 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000753 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000754 Py_INCREF(a);
755 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756 }
Tim Peters8f422462000-09-09 06:13:41 +0000757 nbytes = size * sizeof(char);
758 if (nbytes / sizeof(char) != (size_t)size ||
759 nbytes + sizeof(PyStringObject) <= nbytes) {
760 PyErr_SetString(PyExc_OverflowError,
761 "repeated string is too long");
762 return NULL;
763 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000764 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000765 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000766 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000768 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000769 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000770 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000771 for (i = 0; i < size; i += a->ob_size)
772 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
773 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000774 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000775}
776
777/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
778
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000779static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000780string_slice(register PyStringObject *a, register int i, register int j)
781 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782{
783 if (i < 0)
784 i = 0;
785 if (j < 0)
786 j = 0; /* Avoid signed/unsigned bug in next line */
787 if (j > a->ob_size)
788 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000789 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
790 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000791 Py_INCREF(a);
792 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
794 if (j < i)
795 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000796 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797}
798
Guido van Rossum9284a572000-03-07 15:53:43 +0000799static int
Fred Drakeba096332000-07-09 07:04:36 +0000800string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000801{
802 register char *s, *end;
803 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000804#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000805 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000806 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000807#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000808 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000809 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000810 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000811 return -1;
812 }
813 c = PyString_AsString(el)[0];
814 s = PyString_AsString(a);
815 end = s + PyString_Size(a);
816 while (s < end) {
817 if (c == *s++)
818 return 1;
819 }
820 return 0;
821}
822
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000823static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000824string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000826 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000827 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000829 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 return NULL;
831 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000832 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000833 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000834 if (v == NULL)
835 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000836 else {
837#ifdef COUNT_ALLOCS
838 one_strings++;
839#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000840 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000841 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000842 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843}
844
Martin v. Löwiscd353062001-05-24 16:56:35 +0000845static PyObject*
846string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000848 int c;
849 int len_a, len_b;
850 int min_len;
851 PyObject *result;
852
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000853 /* Make sure both arguments are strings. */
854 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000855 result = Py_NotImplemented;
856 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000857 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000858 if (a == b) {
859 switch (op) {
860 case Py_EQ:case Py_LE:case Py_GE:
861 result = Py_True;
862 goto out;
863 case Py_NE:case Py_LT:case Py_GT:
864 result = Py_False;
865 goto out;
866 }
867 }
868 if (op == Py_EQ) {
869 /* Supporting Py_NE here as well does not save
870 much time, since Py_NE is rarely used. */
871 if (a->ob_size == b->ob_size
872 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000873 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +0000874 a->ob_size) == 0)) {
875 result = Py_True;
876 } else {
877 result = Py_False;
878 }
879 goto out;
880 }
881 len_a = a->ob_size; len_b = b->ob_size;
882 min_len = (len_a < len_b) ? len_a : len_b;
883 if (min_len > 0) {
884 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
885 if (c==0)
886 c = memcmp(a->ob_sval, b->ob_sval, min_len);
887 }else
888 c = 0;
889 if (c == 0)
890 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
891 switch (op) {
892 case Py_LT: c = c < 0; break;
893 case Py_LE: c = c <= 0; break;
894 case Py_EQ: assert(0); break; /* unreachable */
895 case Py_NE: c = c != 0; break;
896 case Py_GT: c = c > 0; break;
897 case Py_GE: c = c >= 0; break;
898 default:
899 result = Py_NotImplemented;
900 goto out;
901 }
902 result = c ? Py_True : Py_False;
903 out:
904 Py_INCREF(result);
905 return result;
906}
907
908int
909_PyString_Eq(PyObject *o1, PyObject *o2)
910{
911 PyStringObject *a, *b;
912 a = (PyStringObject*)o1;
913 b = (PyStringObject*)o2;
914 return a->ob_size == b->ob_size
915 && *a->ob_sval == *b->ob_sval
916 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917}
918
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919static long
Fred Drakeba096332000-07-09 07:04:36 +0000920string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000921{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000922 register int len;
923 register unsigned char *p;
924 register long x;
925
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000926 if (a->ob_shash != -1)
927 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000928 if (a->ob_sinterned != NULL)
929 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000931 len = a->ob_size;
932 p = (unsigned char *) a->ob_sval;
933 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000934 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000935 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000936 x ^= a->ob_size;
937 if (x == -1)
938 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000939 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000940 return x;
941}
942
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000943static int
Fred Drakeba096332000-07-09 07:04:36 +0000944string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000945{
946 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000947 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000948 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000949 return -1;
950 }
951 *ptr = (void *)self->ob_sval;
952 return self->ob_size;
953}
954
955static int
Fred Drakeba096332000-07-09 07:04:36 +0000956string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000957{
Guido van Rossum045e6881997-09-08 18:30:11 +0000958 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000959 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000960 return -1;
961}
962
963static int
Fred Drakeba096332000-07-09 07:04:36 +0000964string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000965{
966 if ( lenp )
967 *lenp = self->ob_size;
968 return 1;
969}
970
Guido van Rossum1db70701998-10-08 02:18:52 +0000971static int
Fred Drakeba096332000-07-09 07:04:36 +0000972string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000973{
974 if ( index != 0 ) {
975 PyErr_SetString(PyExc_SystemError,
976 "accessing non-existent string segment");
977 return -1;
978 }
979 *ptr = self->ob_sval;
980 return self->ob_size;
981}
982
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000984 (inquiry)string_length, /*sq_length*/
985 (binaryfunc)string_concat, /*sq_concat*/
986 (intargfunc)string_repeat, /*sq_repeat*/
987 (intargfunc)string_item, /*sq_item*/
988 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000989 0, /*sq_ass_item*/
990 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000991 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992};
993
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000994static PyBufferProcs string_as_buffer = {
995 (getreadbufferproc)string_buffer_getreadbuf,
996 (getwritebufferproc)string_buffer_getwritebuf,
997 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000998 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000999};
1000
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001001
1002
1003#define LEFTSTRIP 0
1004#define RIGHTSTRIP 1
1005#define BOTHSTRIP 2
1006
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001007/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001008static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1009
1010#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001011
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001012
1013static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001014split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001015{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001016 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001017 PyObject* item;
1018 PyObject *list = PyList_New(0);
1019
1020 if (list == NULL)
1021 return NULL;
1022
Guido van Rossum4c08d552000-03-10 22:55:18 +00001023 for (i = j = 0; i < len; ) {
1024 while (i < len && isspace(Py_CHARMASK(s[i])))
1025 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001026 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001027 while (i < len && !isspace(Py_CHARMASK(s[i])))
1028 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001029 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001030 if (maxsplit-- <= 0)
1031 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001032 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1033 if (item == NULL)
1034 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001035 err = PyList_Append(list, item);
1036 Py_DECREF(item);
1037 if (err < 0)
1038 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001039 while (i < len && isspace(Py_CHARMASK(s[i])))
1040 i++;
1041 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001042 }
1043 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001044 if (j < len) {
1045 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1046 if (item == NULL)
1047 goto finally;
1048 err = PyList_Append(list, item);
1049 Py_DECREF(item);
1050 if (err < 0)
1051 goto finally;
1052 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001053 return list;
1054 finally:
1055 Py_DECREF(list);
1056 return NULL;
1057}
1058
1059
1060static char split__doc__[] =
1061"S.split([sep [,maxsplit]]) -> list of strings\n\
1062\n\
1063Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001064delimiter string. If maxsplit is given, at most maxsplit\n\
1065splits are done. If sep is not specified, any whitespace string\n\
1066is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001067
1068static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001069string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001070{
1071 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001072 int maxsplit = -1;
1073 const char *s = PyString_AS_STRING(self), *sub;
1074 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001075
Guido van Rossum4c08d552000-03-10 22:55:18 +00001076 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001077 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001078 if (maxsplit < 0)
1079 maxsplit = INT_MAX;
1080 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001081 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001082 if (PyString_Check(subobj)) {
1083 sub = PyString_AS_STRING(subobj);
1084 n = PyString_GET_SIZE(subobj);
1085 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001086#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001087 else if (PyUnicode_Check(subobj))
1088 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001089#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001090 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1091 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001092 if (n == 0) {
1093 PyErr_SetString(PyExc_ValueError, "empty separator");
1094 return NULL;
1095 }
1096
1097 list = PyList_New(0);
1098 if (list == NULL)
1099 return NULL;
1100
1101 i = j = 0;
1102 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001103 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001104 if (maxsplit-- <= 0)
1105 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001106 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1107 if (item == NULL)
1108 goto fail;
1109 err = PyList_Append(list, item);
1110 Py_DECREF(item);
1111 if (err < 0)
1112 goto fail;
1113 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001114 }
1115 else
1116 i++;
1117 }
1118 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1119 if (item == NULL)
1120 goto fail;
1121 err = PyList_Append(list, item);
1122 Py_DECREF(item);
1123 if (err < 0)
1124 goto fail;
1125
1126 return list;
1127
1128 fail:
1129 Py_DECREF(list);
1130 return NULL;
1131}
1132
1133
1134static char join__doc__[] =
1135"S.join(sequence) -> string\n\
1136\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001137Return a string which is the concatenation of the strings in the\n\
1138sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001139
1140static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001141string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001142{
1143 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001144 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001145 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001146 char *p;
1147 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001148 size_t sz = 0;
1149 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001150 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001151
Tim Peters19fe14e2001-01-19 03:03:47 +00001152 seq = PySequence_Fast(orig, "");
1153 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001154 if (PyErr_ExceptionMatches(PyExc_TypeError))
1155 PyErr_Format(PyExc_TypeError,
1156 "sequence expected, %.80s found",
1157 orig->ob_type->tp_name);
1158 return NULL;
1159 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001160
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001161 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001162 if (seqlen == 0) {
1163 Py_DECREF(seq);
1164 return PyString_FromString("");
1165 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001166 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001167 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001168 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1169 PyErr_Format(PyExc_TypeError,
1170 "sequence item 0: expected string,"
1171 " %.80s found",
1172 item->ob_type->tp_name);
1173 Py_DECREF(seq);
1174 return NULL;
1175 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001176 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001177 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001178 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001179 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001180
Tim Peters19fe14e2001-01-19 03:03:47 +00001181 /* There are at least two things to join. Do a pre-pass to figure out
1182 * the total amount of space we'll need (sz), see whether any argument
1183 * is absurd, and defer to the Unicode join if appropriate.
1184 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001185 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001186 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001187 item = PySequence_Fast_GET_ITEM(seq, i);
1188 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001189#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001190 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001191 /* Defer to Unicode join.
1192 * CAUTION: There's no gurantee that the
1193 * original sequence can be iterated over
1194 * again, so we must pass seq here.
1195 */
1196 PyObject *result;
1197 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001198 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001199 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001200 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001201#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001202 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001203 "sequence item %i: expected string,"
1204 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001205 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001206 Py_DECREF(seq);
1207 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001208 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001209 sz += PyString_GET_SIZE(item);
1210 if (i != 0)
1211 sz += seplen;
1212 if (sz < old_sz || sz > INT_MAX) {
1213 PyErr_SetString(PyExc_OverflowError,
1214 "join() is too long for a Python string");
1215 Py_DECREF(seq);
1216 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001217 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001218 }
1219
1220 /* Allocate result space. */
1221 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1222 if (res == NULL) {
1223 Py_DECREF(seq);
1224 return NULL;
1225 }
1226
1227 /* Catenate everything. */
1228 p = PyString_AS_STRING(res);
1229 for (i = 0; i < seqlen; ++i) {
1230 size_t n;
1231 item = PySequence_Fast_GET_ITEM(seq, i);
1232 n = PyString_GET_SIZE(item);
1233 memcpy(p, PyString_AS_STRING(item), n);
1234 p += n;
1235 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001236 memcpy(p, sep, seplen);
1237 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001238 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001239 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001240
Jeremy Hylton49048292000-07-11 03:28:17 +00001241 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001243}
1244
Tim Peters52e155e2001-06-16 05:42:57 +00001245PyObject *
1246_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001247{
Tim Petersa7259592001-06-16 05:11:17 +00001248 assert(sep != NULL && PyString_Check(sep));
1249 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001250 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001251}
1252
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001253static long
Fred Drakeba096332000-07-09 07:04:36 +00001254string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001255{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001256 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001257 int len = PyString_GET_SIZE(self);
1258 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001259 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001261 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001262 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001263 return -2;
1264 if (PyString_Check(subobj)) {
1265 sub = PyString_AS_STRING(subobj);
1266 n = PyString_GET_SIZE(subobj);
1267 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001268#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001269 else if (PyUnicode_Check(subobj))
1270 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001271#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001272 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001273 return -2;
1274
1275 if (last > len)
1276 last = len;
1277 if (last < 0)
1278 last += len;
1279 if (last < 0)
1280 last = 0;
1281 if (i < 0)
1282 i += len;
1283 if (i < 0)
1284 i = 0;
1285
Guido van Rossum4c08d552000-03-10 22:55:18 +00001286 if (dir > 0) {
1287 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001289 last -= n;
1290 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001291 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292 return (long)i;
1293 }
1294 else {
1295 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001296
Guido van Rossum4c08d552000-03-10 22:55:18 +00001297 if (n == 0 && i <= last)
1298 return (long)last;
1299 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001300 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001301 return (long)j;
1302 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001303
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304 return -1;
1305}
1306
1307
1308static char find__doc__[] =
1309"S.find(sub [,start [,end]]) -> int\n\
1310\n\
1311Return the lowest index in S where substring sub is found,\n\
1312such that sub is contained within s[start,end]. Optional\n\
1313arguments start and end are interpreted as in slice notation.\n\
1314\n\
1315Return -1 on failure.";
1316
1317static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001318string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001320 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321 if (result == -2)
1322 return NULL;
1323 return PyInt_FromLong(result);
1324}
1325
1326
1327static char index__doc__[] =
1328"S.index(sub [,start [,end]]) -> int\n\
1329\n\
1330Like S.find() but raise ValueError when the substring is not found.";
1331
1332static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001333string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336 if (result == -2)
1337 return NULL;
1338 if (result == -1) {
1339 PyErr_SetString(PyExc_ValueError,
1340 "substring not found in string.index");
1341 return NULL;
1342 }
1343 return PyInt_FromLong(result);
1344}
1345
1346
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347static char rfind__doc__[] =
1348"S.rfind(sub [,start [,end]]) -> int\n\
1349\n\
1350Return the highest index in S where substring sub is found,\n\
1351such that sub is contained within s[start,end]. Optional\n\
1352arguments start and end are interpreted as in slice notation.\n\
1353\n\
1354Return -1 on failure.";
1355
1356static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001357string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001360 if (result == -2)
1361 return NULL;
1362 return PyInt_FromLong(result);
1363}
1364
1365
1366static char rindex__doc__[] =
1367"S.rindex(sub [,start [,end]]) -> int\n\
1368\n\
1369Like S.rfind() but raise ValueError when the substring is not found.";
1370
1371static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001372string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001373{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001374 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375 if (result == -2)
1376 return NULL;
1377 if (result == -1) {
1378 PyErr_SetString(PyExc_ValueError,
1379 "substring not found in string.rindex");
1380 return NULL;
1381 }
1382 return PyInt_FromLong(result);
1383}
1384
1385
1386static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001387do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1388{
1389 char *s = PyString_AS_STRING(self);
1390 int len = PyString_GET_SIZE(self);
1391 char *sep = PyString_AS_STRING(sepobj);
1392 int seplen = PyString_GET_SIZE(sepobj);
1393 int i, j;
1394
1395 i = 0;
1396 if (striptype != RIGHTSTRIP) {
1397 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1398 i++;
1399 }
1400 }
1401
1402 j = len;
1403 if (striptype != LEFTSTRIP) {
1404 do {
1405 j--;
1406 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1407 j++;
1408 }
1409
1410 if (i == 0 && j == len && PyString_CheckExact(self)) {
1411 Py_INCREF(self);
1412 return (PyObject*)self;
1413 }
1414 else
1415 return PyString_FromStringAndSize(s+i, j-i);
1416}
1417
1418
1419static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001420do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421{
1422 char *s = PyString_AS_STRING(self);
1423 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 i = 0;
1426 if (striptype != RIGHTSTRIP) {
1427 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1428 i++;
1429 }
1430 }
1431
1432 j = len;
1433 if (striptype != LEFTSTRIP) {
1434 do {
1435 j--;
1436 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1437 j++;
1438 }
1439
Tim Peters8fa5dd02001-09-12 02:18:30 +00001440 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441 Py_INCREF(self);
1442 return (PyObject*)self;
1443 }
1444 else
1445 return PyString_FromStringAndSize(s+i, j-i);
1446}
1447
1448
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001449static PyObject *
1450do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1451{
1452 PyObject *sep = NULL;
1453
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001454 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001455 return NULL;
1456
1457 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001458 if (PyString_Check(sep))
1459 return do_xstrip(self, striptype, sep);
Walter Dörwald775c11f2002-05-13 09:00:41 +00001460#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001461 else if (PyUnicode_Check(sep)) {
1462 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1463 PyObject *res;
1464 if (uniself==NULL)
1465 return NULL;
1466 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1467 striptype, sep);
1468 Py_DECREF(uniself);
1469 return res;
1470 }
Walter Dörwald775c11f2002-05-13 09:00:41 +00001471#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001472 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001473 PyErr_Format(PyExc_TypeError,
Walter Dörwald775c11f2002-05-13 09:00:41 +00001474#ifdef Py_USING_UNICODE
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001475 "%s arg must be None, str or unicode",
Walter Dörwald775c11f2002-05-13 09:00:41 +00001476#else
1477 "%s arg must be None or str",
1478#endif
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001479 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001480 return NULL;
1481 }
1482 return do_xstrip(self, striptype, sep);
1483 }
1484
1485 return do_strip(self, striptype);
1486}
1487
1488
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489static char strip__doc__[] =
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001490"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491\n\
1492Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001493whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001494If sep is given and not None, remove characters in sep instead.\n\
1495If sep is unicode, S will be converted to unicode before stripping";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496
1497static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001498string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001500 if (PyTuple_GET_SIZE(args) == 0)
1501 return do_strip(self, BOTHSTRIP); /* Common case */
1502 else
1503 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504}
1505
1506
1507static char lstrip__doc__[] =
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001508"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001510Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001511If sep is given and not None, remove characters in sep instead.\n\
1512If sep is unicode, S will be converted to unicode before stripping";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001513
1514static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001515string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001517 if (PyTuple_GET_SIZE(args) == 0)
1518 return do_strip(self, LEFTSTRIP); /* Common case */
1519 else
1520 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521}
1522
1523
1524static char rstrip__doc__[] =
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001525"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001526\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001527Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001528If sep is given and not None, remove characters in sep instead.\n\
1529If sep is unicode, S will be converted to unicode before stripping";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001530
1531static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001532string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001534 if (PyTuple_GET_SIZE(args) == 0)
1535 return do_strip(self, RIGHTSTRIP); /* Common case */
1536 else
1537 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001538}
1539
1540
1541static char lower__doc__[] =
1542"S.lower() -> string\n\
1543\n\
1544Return a copy of the string S converted to lowercase.";
1545
1546static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001547string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001548{
1549 char *s = PyString_AS_STRING(self), *s_new;
1550 int i, n = PyString_GET_SIZE(self);
1551 PyObject *new;
1552
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001553 new = PyString_FromStringAndSize(NULL, n);
1554 if (new == NULL)
1555 return NULL;
1556 s_new = PyString_AsString(new);
1557 for (i = 0; i < n; i++) {
1558 int c = Py_CHARMASK(*s++);
1559 if (isupper(c)) {
1560 *s_new = tolower(c);
1561 } else
1562 *s_new = c;
1563 s_new++;
1564 }
1565 return new;
1566}
1567
1568
1569static char upper__doc__[] =
1570"S.upper() -> string\n\
1571\n\
1572Return a copy of the string S converted to uppercase.";
1573
1574static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001575string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001576{
1577 char *s = PyString_AS_STRING(self), *s_new;
1578 int i, n = PyString_GET_SIZE(self);
1579 PyObject *new;
1580
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001581 new = PyString_FromStringAndSize(NULL, n);
1582 if (new == NULL)
1583 return NULL;
1584 s_new = PyString_AsString(new);
1585 for (i = 0; i < n; i++) {
1586 int c = Py_CHARMASK(*s++);
1587 if (islower(c)) {
1588 *s_new = toupper(c);
1589 } else
1590 *s_new = c;
1591 s_new++;
1592 }
1593 return new;
1594}
1595
1596
Guido van Rossum4c08d552000-03-10 22:55:18 +00001597static char title__doc__[] =
1598"S.title() -> string\n\
1599\n\
1600Return a titlecased version of S, i.e. words start with uppercase\n\
1601characters, all remaining cased characters have lowercase.";
1602
1603static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001604string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001605{
1606 char *s = PyString_AS_STRING(self), *s_new;
1607 int i, n = PyString_GET_SIZE(self);
1608 int previous_is_cased = 0;
1609 PyObject *new;
1610
Guido van Rossum4c08d552000-03-10 22:55:18 +00001611 new = PyString_FromStringAndSize(NULL, n);
1612 if (new == NULL)
1613 return NULL;
1614 s_new = PyString_AsString(new);
1615 for (i = 0; i < n; i++) {
1616 int c = Py_CHARMASK(*s++);
1617 if (islower(c)) {
1618 if (!previous_is_cased)
1619 c = toupper(c);
1620 previous_is_cased = 1;
1621 } else if (isupper(c)) {
1622 if (previous_is_cased)
1623 c = tolower(c);
1624 previous_is_cased = 1;
1625 } else
1626 previous_is_cased = 0;
1627 *s_new++ = c;
1628 }
1629 return new;
1630}
1631
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632static char capitalize__doc__[] =
1633"S.capitalize() -> string\n\
1634\n\
1635Return a copy of the string S with only its first character\n\
1636capitalized.";
1637
1638static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001639string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001640{
1641 char *s = PyString_AS_STRING(self), *s_new;
1642 int i, n = PyString_GET_SIZE(self);
1643 PyObject *new;
1644
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001645 new = PyString_FromStringAndSize(NULL, n);
1646 if (new == NULL)
1647 return NULL;
1648 s_new = PyString_AsString(new);
1649 if (0 < n) {
1650 int c = Py_CHARMASK(*s++);
1651 if (islower(c))
1652 *s_new = toupper(c);
1653 else
1654 *s_new = c;
1655 s_new++;
1656 }
1657 for (i = 1; i < n; i++) {
1658 int c = Py_CHARMASK(*s++);
1659 if (isupper(c))
1660 *s_new = tolower(c);
1661 else
1662 *s_new = c;
1663 s_new++;
1664 }
1665 return new;
1666}
1667
1668
1669static char count__doc__[] =
1670"S.count(sub[, start[, end]]) -> int\n\
1671\n\
1672Return the number of occurrences of substring sub in string\n\
1673S[start:end]. Optional arguments start and end are\n\
1674interpreted as in slice notation.";
1675
1676static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001677string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001680 int len = PyString_GET_SIZE(self), n;
1681 int i = 0, last = INT_MAX;
1682 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684
Guido van Rossumc6821402000-05-08 14:08:05 +00001685 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1686 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001688
Guido van Rossum4c08d552000-03-10 22:55:18 +00001689 if (PyString_Check(subobj)) {
1690 sub = PyString_AS_STRING(subobj);
1691 n = PyString_GET_SIZE(subobj);
1692 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001693#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001694 else if (PyUnicode_Check(subobj)) {
1695 int count;
1696 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1697 if (count == -1)
1698 return NULL;
1699 else
1700 return PyInt_FromLong((long) count);
1701 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001702#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001703 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1704 return NULL;
1705
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001706 if (last > len)
1707 last = len;
1708 if (last < 0)
1709 last += len;
1710 if (last < 0)
1711 last = 0;
1712 if (i < 0)
1713 i += len;
1714 if (i < 0)
1715 i = 0;
1716 m = last + 1 - n;
1717 if (n == 0)
1718 return PyInt_FromLong((long) (m-i));
1719
1720 r = 0;
1721 while (i < m) {
1722 if (!memcmp(s+i, sub, n)) {
1723 r++;
1724 i += n;
1725 } else {
1726 i++;
1727 }
1728 }
1729 return PyInt_FromLong((long) r);
1730}
1731
1732
1733static char swapcase__doc__[] =
1734"S.swapcase() -> string\n\
1735\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001736Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737converted to lowercase and vice versa.";
1738
1739static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001740string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741{
1742 char *s = PyString_AS_STRING(self), *s_new;
1743 int i, n = PyString_GET_SIZE(self);
1744 PyObject *new;
1745
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001746 new = PyString_FromStringAndSize(NULL, n);
1747 if (new == NULL)
1748 return NULL;
1749 s_new = PyString_AsString(new);
1750 for (i = 0; i < n; i++) {
1751 int c = Py_CHARMASK(*s++);
1752 if (islower(c)) {
1753 *s_new = toupper(c);
1754 }
1755 else if (isupper(c)) {
1756 *s_new = tolower(c);
1757 }
1758 else
1759 *s_new = c;
1760 s_new++;
1761 }
1762 return new;
1763}
1764
1765
1766static char translate__doc__[] =
1767"S.translate(table [,deletechars]) -> string\n\
1768\n\
1769Return a copy of the string S, where all characters occurring\n\
1770in the optional argument deletechars are removed, and the\n\
1771remaining characters have been mapped through the given\n\
1772translation table, which must be a string of length 256.";
1773
1774static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001775string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001777 register char *input, *output;
1778 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779 register int i, c, changed = 0;
1780 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001781 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001782 int inlen, tablen, dellen = 0;
1783 PyObject *result;
1784 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001785 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001786
Guido van Rossum4c08d552000-03-10 22:55:18 +00001787 if (!PyArg_ParseTuple(args, "O|O:translate",
1788 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001790
1791 if (PyString_Check(tableobj)) {
1792 table1 = PyString_AS_STRING(tableobj);
1793 tablen = PyString_GET_SIZE(tableobj);
1794 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001795#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001796 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001797 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001798 parameter; instead a mapping to None will cause characters
1799 to be deleted. */
1800 if (delobj != NULL) {
1801 PyErr_SetString(PyExc_TypeError,
1802 "deletions are implemented differently for unicode");
1803 return NULL;
1804 }
1805 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1806 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001807#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001808 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001810
1811 if (delobj != NULL) {
1812 if (PyString_Check(delobj)) {
1813 del_table = PyString_AS_STRING(delobj);
1814 dellen = PyString_GET_SIZE(delobj);
1815 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001816#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001817 else if (PyUnicode_Check(delobj)) {
1818 PyErr_SetString(PyExc_TypeError,
1819 "deletions are implemented differently for unicode");
1820 return NULL;
1821 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001822#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001823 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1824 return NULL;
1825
1826 if (tablen != 256) {
1827 PyErr_SetString(PyExc_ValueError,
1828 "translation table must be 256 characters long");
1829 return NULL;
1830 }
1831 }
1832 else {
1833 del_table = NULL;
1834 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001835 }
1836
1837 table = table1;
1838 inlen = PyString_Size(input_obj);
1839 result = PyString_FromStringAndSize((char *)NULL, inlen);
1840 if (result == NULL)
1841 return NULL;
1842 output_start = output = PyString_AsString(result);
1843 input = PyString_AsString(input_obj);
1844
1845 if (dellen == 0) {
1846 /* If no deletions are required, use faster code */
1847 for (i = inlen; --i >= 0; ) {
1848 c = Py_CHARMASK(*input++);
1849 if (Py_CHARMASK((*output++ = table[c])) != c)
1850 changed = 1;
1851 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001852 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001853 return result;
1854 Py_DECREF(result);
1855 Py_INCREF(input_obj);
1856 return input_obj;
1857 }
1858
1859 for (i = 0; i < 256; i++)
1860 trans_table[i] = Py_CHARMASK(table[i]);
1861
1862 for (i = 0; i < dellen; i++)
1863 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1864
1865 for (i = inlen; --i >= 0; ) {
1866 c = Py_CHARMASK(*input++);
1867 if (trans_table[c] != -1)
1868 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1869 continue;
1870 changed = 1;
1871 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001872 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001873 Py_DECREF(result);
1874 Py_INCREF(input_obj);
1875 return input_obj;
1876 }
1877 /* Fix the size of the resulting string */
Tim Peters5de98422002-04-27 18:44:32 +00001878 if (inlen > 0)
1879 _PyString_Resize(&result, output - output_start);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001880 return result;
1881}
1882
1883
1884/* What follows is used for implementing replace(). Perry Stoll. */
1885
1886/*
1887 mymemfind
1888
1889 strstr replacement for arbitrary blocks of memory.
1890
Barry Warsaw51ac5802000-03-20 16:36:48 +00001891 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 contents of memory pointed to by PAT. Returns the index into MEM if
1893 found, or -1 if not found. If len of PAT is greater than length of
1894 MEM, the function returns -1.
1895*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001896static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001897mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898{
1899 register int ii;
1900
1901 /* pattern can not occur in the last pat_len-1 chars */
1902 len -= pat_len;
1903
1904 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001905 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906 return ii;
1907 }
1908 }
1909 return -1;
1910}
1911
1912/*
1913 mymemcnt
1914
1915 Return the number of distinct times PAT is found in MEM.
1916 meaning mem=1111 and pat==11 returns 2.
1917 mem=11111 and pat==11 also return 2.
1918 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001919static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001920mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921{
1922 register int offset = 0;
1923 int nfound = 0;
1924
1925 while (len >= 0) {
1926 offset = mymemfind(mem, len, pat, pat_len);
1927 if (offset == -1)
1928 break;
1929 mem += offset + pat_len;
1930 len -= offset + pat_len;
1931 nfound++;
1932 }
1933 return nfound;
1934}
1935
1936/*
1937 mymemreplace
1938
Thomas Wouters7e474022000-07-16 12:04:32 +00001939 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001940 replaced with SUB.
1941
Thomas Wouters7e474022000-07-16 12:04:32 +00001942 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001943 of PAT in STR, then the original string is returned. Otherwise, a new
1944 string is allocated here and returned.
1945
1946 on return, out_len is:
1947 the length of output string, or
1948 -1 if the input string is returned, or
1949 unchanged if an error occurs (no memory).
1950
1951 return value is:
1952 the new string allocated locally, or
1953 NULL if an error occurred.
1954*/
1955static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001956mymemreplace(const char *str, int len, /* input string */
1957 const char *pat, int pat_len, /* pattern string to find */
1958 const char *sub, int sub_len, /* substitution string */
1959 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001960 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001961{
1962 char *out_s;
1963 char *new_s;
1964 int nfound, offset, new_len;
1965
1966 if (len == 0 || pat_len > len)
1967 goto return_same;
1968
1969 /* find length of output string */
1970 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001971 if (count < 0)
1972 count = INT_MAX;
1973 else if (nfound > count)
1974 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001975 if (nfound == 0)
1976 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001977
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001978 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001979 if (new_len == 0) {
1980 /* Have to allocate something for the caller to free(). */
1981 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001982 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001983 return NULL;
1984 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001985 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001986 else {
1987 assert(new_len > 0);
1988 new_s = (char *)PyMem_MALLOC(new_len);
1989 if (new_s == NULL)
1990 return NULL;
1991 out_s = new_s;
1992
Tim Peters9c012af2001-05-10 00:32:57 +00001993 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001994 /* find index of next instance of pattern */
1995 offset = mymemfind(str, len, pat, pat_len);
1996 if (offset == -1)
1997 break;
1998
1999 /* copy non matching part of input string */
2000 memcpy(new_s, str, offset);
2001 str += offset + pat_len;
2002 len -= offset + pat_len;
2003
2004 /* copy substitute into the output string */
2005 new_s += offset;
2006 memcpy(new_s, sub, sub_len);
2007 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002008 }
2009 /* copy any remaining values into output string */
2010 if (len > 0)
2011 memcpy(new_s, str, len);
2012 }
2013 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002014 return out_s;
2015
2016 return_same:
2017 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002018 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002019}
2020
2021
2022static char replace__doc__[] =
2023"S.replace (old, new[, maxsplit]) -> string\n\
2024\n\
2025Return a copy of string S with all occurrences of substring\n\
2026old replaced by new. If the optional argument maxsplit is\n\
2027given, only the first maxsplit occurrences are replaced.";
2028
2029static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002030string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002032 const char *str = PyString_AS_STRING(self), *sub, *repl;
2033 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002034 const int len = PyString_GET_SIZE(self);
2035 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002036 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002037 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002038 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002039
Guido van Rossum4c08d552000-03-10 22:55:18 +00002040 if (!PyArg_ParseTuple(args, "OO|i:replace",
2041 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002042 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002043
2044 if (PyString_Check(subobj)) {
2045 sub = PyString_AS_STRING(subobj);
2046 sub_len = PyString_GET_SIZE(subobj);
2047 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002048#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002049 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002050 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002051 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002052#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002053 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2054 return NULL;
2055
2056 if (PyString_Check(replobj)) {
2057 repl = PyString_AS_STRING(replobj);
2058 repl_len = PyString_GET_SIZE(replobj);
2059 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002060#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002061 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002062 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002063 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002064#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002065 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2066 return NULL;
2067
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002068 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002069 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070 return NULL;
2071 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002072 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002073 if (new_s == NULL) {
2074 PyErr_NoMemory();
2075 return NULL;
2076 }
2077 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002078 if (PyString_CheckExact(self)) {
2079 /* we're returning another reference to self */
2080 new = (PyObject*)self;
2081 Py_INCREF(new);
2082 }
2083 else {
2084 new = PyString_FromStringAndSize(str, len);
2085 if (new == NULL)
2086 return NULL;
2087 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088 }
2089 else {
2090 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002091 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092 }
2093 return new;
2094}
2095
2096
2097static char startswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002098"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002099\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002100Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002101optional start, test S beginning at that position. With optional end, stop\n\
2102comparing S at that position.";
2103
2104static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002105string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002106{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002109 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002110 int plen;
2111 int start = 0;
2112 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002113 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002114
Guido van Rossumc6821402000-05-08 14:08:05 +00002115 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2116 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002117 return NULL;
2118 if (PyString_Check(subobj)) {
2119 prefix = PyString_AS_STRING(subobj);
2120 plen = PyString_GET_SIZE(subobj);
2121 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002122#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002123 else if (PyUnicode_Check(subobj)) {
2124 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002125 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002126 subobj, start, end, -1);
2127 if (rc == -1)
2128 return NULL;
2129 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002130 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002131 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002132#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002133 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002134 return NULL;
2135
2136 /* adopt Java semantics for index out of range. it is legal for
2137 * offset to be == plen, but this only returns true if prefix is
2138 * the empty string.
2139 */
2140 if (start < 0 || start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002141 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002142
2143 if (!memcmp(str+start, prefix, plen)) {
2144 /* did the match end after the specified end? */
2145 if (end < 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002146 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147 else if (end - start < plen)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002148 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002149 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002150 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002151 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002152 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002153}
2154
2155
2156static char endswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002157"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002158\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002159Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002160optional start, test S beginning at that position. With optional end, stop\n\
2161comparing S at that position.";
2162
2163static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002164string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002165{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002166 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002167 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002168 const char* suffix;
2169 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002170 int start = 0;
2171 int end = -1;
2172 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002173 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002174
Guido van Rossumc6821402000-05-08 14:08:05 +00002175 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2176 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002177 return NULL;
2178 if (PyString_Check(subobj)) {
2179 suffix = PyString_AS_STRING(subobj);
2180 slen = PyString_GET_SIZE(subobj);
2181 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002182#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002183 else if (PyUnicode_Check(subobj)) {
2184 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002185 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002186 subobj, start, end, +1);
2187 if (rc == -1)
2188 return NULL;
2189 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002190 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002191 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002192#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002193 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002194 return NULL;
2195
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196 if (start < 0 || start > len || slen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002197 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002198
2199 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002200 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002201
Guido van Rossum4c08d552000-03-10 22:55:18 +00002202 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002203 return PyBool_FromLong(1);
2204 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002205}
2206
2207
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002208static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002209"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002210\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002211Encodes S using the codec registered for encoding. encoding defaults\n\
2212to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002213handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2214a ValueError. Other possible values are 'ignore' and 'replace'.";
2215
2216static PyObject *
2217string_encode(PyStringObject *self, PyObject *args)
2218{
2219 char *encoding = NULL;
2220 char *errors = NULL;
2221 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2222 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002223 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2224}
2225
2226
2227static char decode__doc__[] =
2228"S.decode([encoding[,errors]]) -> object\n\
2229\n\
2230Decodes S using the codec registered for encoding. encoding defaults\n\
2231to the default encoding. errors may be given to set a different error\n\
2232handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2233a ValueError. Other possible values are 'ignore' and 'replace'.";
2234
2235static PyObject *
2236string_decode(PyStringObject *self, PyObject *args)
2237{
2238 char *encoding = NULL;
2239 char *errors = NULL;
2240 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2241 return NULL;
2242 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002243}
2244
2245
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246static char expandtabs__doc__[] =
2247"S.expandtabs([tabsize]) -> string\n\
2248\n\
2249Return a copy of S where all tab characters are expanded using spaces.\n\
2250If tabsize is not given, a tab size of 8 characters is assumed.";
2251
2252static PyObject*
2253string_expandtabs(PyStringObject *self, PyObject *args)
2254{
2255 const char *e, *p;
2256 char *q;
2257 int i, j;
2258 PyObject *u;
2259 int tabsize = 8;
2260
2261 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2262 return NULL;
2263
Thomas Wouters7e474022000-07-16 12:04:32 +00002264 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265 i = j = 0;
2266 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2267 for (p = PyString_AS_STRING(self); p < e; p++)
2268 if (*p == '\t') {
2269 if (tabsize > 0)
2270 j += tabsize - (j % tabsize);
2271 }
2272 else {
2273 j++;
2274 if (*p == '\n' || *p == '\r') {
2275 i += j;
2276 j = 0;
2277 }
2278 }
2279
2280 /* Second pass: create output string and fill it */
2281 u = PyString_FromStringAndSize(NULL, i + j);
2282 if (!u)
2283 return NULL;
2284
2285 j = 0;
2286 q = PyString_AS_STRING(u);
2287
2288 for (p = PyString_AS_STRING(self); p < e; p++)
2289 if (*p == '\t') {
2290 if (tabsize > 0) {
2291 i = tabsize - (j % tabsize);
2292 j += i;
2293 while (i--)
2294 *q++ = ' ';
2295 }
2296 }
2297 else {
2298 j++;
2299 *q++ = *p;
2300 if (*p == '\n' || *p == '\r')
2301 j = 0;
2302 }
2303
2304 return u;
2305}
2306
Tim Peters8fa5dd02001-09-12 02:18:30 +00002307static PyObject *
2308pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309{
2310 PyObject *u;
2311
2312 if (left < 0)
2313 left = 0;
2314 if (right < 0)
2315 right = 0;
2316
Tim Peters8fa5dd02001-09-12 02:18:30 +00002317 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002318 Py_INCREF(self);
2319 return (PyObject *)self;
2320 }
2321
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002322 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002323 left + PyString_GET_SIZE(self) + right);
2324 if (u) {
2325 if (left)
2326 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002327 memcpy(PyString_AS_STRING(u) + left,
2328 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002329 PyString_GET_SIZE(self));
2330 if (right)
2331 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2332 fill, right);
2333 }
2334
2335 return u;
2336}
2337
2338static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002339"S.ljust(width) -> string\n"
2340"\n"
2341"Return S left justified in a string of length width. Padding is\n"
2342"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002343
2344static PyObject *
2345string_ljust(PyStringObject *self, PyObject *args)
2346{
2347 int width;
2348 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2349 return NULL;
2350
Tim Peters8fa5dd02001-09-12 02:18:30 +00002351 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 Py_INCREF(self);
2353 return (PyObject*) self;
2354 }
2355
2356 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2357}
2358
2359
2360static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002361"S.rjust(width) -> string\n"
2362"\n"
2363"Return S right justified in a string of length width. Padding is\n"
2364"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365
2366static PyObject *
2367string_rjust(PyStringObject *self, PyObject *args)
2368{
2369 int width;
2370 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2371 return NULL;
2372
Tim Peters8fa5dd02001-09-12 02:18:30 +00002373 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 Py_INCREF(self);
2375 return (PyObject*) self;
2376 }
2377
2378 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2379}
2380
2381
2382static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002383"S.center(width) -> string\n"
2384"\n"
2385"Return S centered in a string of length width. Padding is done\n"
2386"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387
2388static PyObject *
2389string_center(PyStringObject *self, PyObject *args)
2390{
2391 int marg, left;
2392 int width;
2393
2394 if (!PyArg_ParseTuple(args, "i:center", &width))
2395 return NULL;
2396
Tim Peters8fa5dd02001-09-12 02:18:30 +00002397 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002398 Py_INCREF(self);
2399 return (PyObject*) self;
2400 }
2401
2402 marg = width - PyString_GET_SIZE(self);
2403 left = marg / 2 + (marg & width & 1);
2404
2405 return pad(self, left, marg - left, ' ');
2406}
2407
Walter Dörwald068325e2002-04-15 13:36:47 +00002408static char zfill__doc__[] =
2409"S.zfill(width) -> string\n"
2410"\n"
2411"Pad a numeric string S with zeros on the left, to fill a field\n"
2412"of the specified width. The string S is never truncated.";
2413
2414static PyObject *
2415string_zfill(PyStringObject *self, PyObject *args)
2416{
2417 int fill;
2418 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002419 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002420
2421 int width;
2422 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2423 return NULL;
2424
2425 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002426 if (PyString_CheckExact(self)) {
2427 Py_INCREF(self);
2428 return (PyObject*) self;
2429 }
2430 else
2431 return PyString_FromStringAndSize(
2432 PyString_AS_STRING(self),
2433 PyString_GET_SIZE(self)
2434 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002435 }
2436
2437 fill = width - PyString_GET_SIZE(self);
2438
2439 s = pad(self, fill, 0, '0');
2440
2441 if (s == NULL)
2442 return NULL;
2443
2444 p = PyString_AS_STRING(s);
2445 if (p[fill] == '+' || p[fill] == '-') {
2446 /* move sign to beginning of string */
2447 p[0] = p[fill];
2448 p[fill] = '0';
2449 }
2450
2451 return (PyObject*) s;
2452}
2453
Guido van Rossum4c08d552000-03-10 22:55:18 +00002454static char isspace__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002455"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002456"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002457"Return True if there are only whitespace characters in S,\n"
2458"False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002459
2460static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002461string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002462{
Fred Drakeba096332000-07-09 07:04:36 +00002463 register const unsigned char *p
2464 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002465 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002466
Guido van Rossum4c08d552000-03-10 22:55:18 +00002467 /* Shortcut for single character strings */
2468 if (PyString_GET_SIZE(self) == 1 &&
2469 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002470 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002471
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002472 /* Special case for empty strings */
2473 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002474 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002475
Guido van Rossum4c08d552000-03-10 22:55:18 +00002476 e = p + PyString_GET_SIZE(self);
2477 for (; p < e; p++) {
2478 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002479 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002480 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002481 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002482}
2483
2484
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002485static char isalpha__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002486"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002487\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002488Return True if all characters in S are alphabetic\n\
2489and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002490
2491static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002492string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002493{
Fred Drakeba096332000-07-09 07:04:36 +00002494 register const unsigned char *p
2495 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002496 register const unsigned char *e;
2497
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002498 /* Shortcut for single character strings */
2499 if (PyString_GET_SIZE(self) == 1 &&
2500 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002501 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002502
2503 /* Special case for empty strings */
2504 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002505 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002506
2507 e = p + PyString_GET_SIZE(self);
2508 for (; p < e; p++) {
2509 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002510 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002511 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002512 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002513}
2514
2515
2516static char isalnum__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002517"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002518\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002519Return True if all characters in S are alphanumeric\n\
2520and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002521
2522static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002523string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002524{
Fred Drakeba096332000-07-09 07:04:36 +00002525 register const unsigned char *p
2526 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002527 register const unsigned char *e;
2528
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002529 /* Shortcut for single character strings */
2530 if (PyString_GET_SIZE(self) == 1 &&
2531 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002532 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002533
2534 /* Special case for empty strings */
2535 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002536 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002537
2538 e = p + PyString_GET_SIZE(self);
2539 for (; p < e; p++) {
2540 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002541 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002542 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002543 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002544}
2545
2546
Guido van Rossum4c08d552000-03-10 22:55:18 +00002547static char isdigit__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002548"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002549\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002550Return True if there are only digit characters in S,\n\
2551False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002552
2553static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002554string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002555{
Fred Drakeba096332000-07-09 07:04:36 +00002556 register const unsigned char *p
2557 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002558 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002559
Guido van Rossum4c08d552000-03-10 22:55:18 +00002560 /* Shortcut for single character strings */
2561 if (PyString_GET_SIZE(self) == 1 &&
2562 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002563 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002564
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002565 /* Special case for empty strings */
2566 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002567 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002568
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569 e = p + PyString_GET_SIZE(self);
2570 for (; p < e; p++) {
2571 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002572 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002574 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002575}
2576
2577
2578static char islower__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002579"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002581Return True if all cased characters in S are lowercase and there is\n\
2582at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002583
2584static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002585string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002586{
Fred Drakeba096332000-07-09 07:04:36 +00002587 register const unsigned char *p
2588 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002589 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002590 int cased;
2591
Guido van Rossum4c08d552000-03-10 22:55:18 +00002592 /* Shortcut for single character strings */
2593 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002594 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002595
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002596 /* Special case for empty strings */
2597 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002598 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002599
Guido van Rossum4c08d552000-03-10 22:55:18 +00002600 e = p + PyString_GET_SIZE(self);
2601 cased = 0;
2602 for (; p < e; p++) {
2603 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002604 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002605 else if (!cased && islower(*p))
2606 cased = 1;
2607 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002608 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002609}
2610
2611
2612static char isupper__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002613"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002615Return True if all cased characters in S are uppercase and there is\n\
2616at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002617
2618static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002619string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620{
Fred Drakeba096332000-07-09 07:04:36 +00002621 register const unsigned char *p
2622 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002623 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002624 int cased;
2625
Guido van Rossum4c08d552000-03-10 22:55:18 +00002626 /* Shortcut for single character strings */
2627 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002628 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002629
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002630 /* Special case for empty strings */
2631 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002632 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002633
Guido van Rossum4c08d552000-03-10 22:55:18 +00002634 e = p + PyString_GET_SIZE(self);
2635 cased = 0;
2636 for (; p < e; p++) {
2637 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002638 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002639 else if (!cased && isupper(*p))
2640 cased = 1;
2641 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002642 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002643}
2644
2645
2646static char istitle__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002647"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002648\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002649Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002650may only follow uncased characters and lowercase characters only cased\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002651ones. Return False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002652
2653static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002654string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655{
Fred Drakeba096332000-07-09 07:04:36 +00002656 register const unsigned char *p
2657 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002658 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002659 int cased, previous_is_cased;
2660
Guido van Rossum4c08d552000-03-10 22:55:18 +00002661 /* Shortcut for single character strings */
2662 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002663 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002664
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002665 /* Special case for empty strings */
2666 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002667 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002668
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669 e = p + PyString_GET_SIZE(self);
2670 cased = 0;
2671 previous_is_cased = 0;
2672 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002673 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002674
2675 if (isupper(ch)) {
2676 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002677 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002678 previous_is_cased = 1;
2679 cased = 1;
2680 }
2681 else if (islower(ch)) {
2682 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002683 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002684 previous_is_cased = 1;
2685 cased = 1;
2686 }
2687 else
2688 previous_is_cased = 0;
2689 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002690 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002691}
2692
2693
2694static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002695"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002696\n\
2697Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002698Line breaks are not included in the resulting list unless keepends\n\
2699is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002700
2701#define SPLIT_APPEND(data, left, right) \
2702 str = PyString_FromStringAndSize(data + left, right - left); \
2703 if (!str) \
2704 goto onError; \
2705 if (PyList_Append(list, str)) { \
2706 Py_DECREF(str); \
2707 goto onError; \
2708 } \
2709 else \
2710 Py_DECREF(str);
2711
2712static PyObject*
2713string_splitlines(PyStringObject *self, PyObject *args)
2714{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002715 register int i;
2716 register int j;
2717 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002718 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002719 PyObject *list;
2720 PyObject *str;
2721 char *data;
2722
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002723 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002724 return NULL;
2725
2726 data = PyString_AS_STRING(self);
2727 len = PyString_GET_SIZE(self);
2728
Guido van Rossum4c08d552000-03-10 22:55:18 +00002729 list = PyList_New(0);
2730 if (!list)
2731 goto onError;
2732
2733 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002734 int eol;
2735
Guido van Rossum4c08d552000-03-10 22:55:18 +00002736 /* Find a line and append it */
2737 while (i < len && data[i] != '\n' && data[i] != '\r')
2738 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002739
2740 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002741 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002742 if (i < len) {
2743 if (data[i] == '\r' && i + 1 < len &&
2744 data[i+1] == '\n')
2745 i += 2;
2746 else
2747 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002748 if (keepends)
2749 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002750 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002751 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002752 j = i;
2753 }
2754 if (j < len) {
2755 SPLIT_APPEND(data, j, len);
2756 }
2757
2758 return list;
2759
2760 onError:
2761 Py_DECREF(list);
2762 return NULL;
2763}
2764
2765#undef SPLIT_APPEND
2766
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002767
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002768static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002769string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002770 /* Counterparts of the obsolete stropmodule functions; except
2771 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002772 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2773 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2774 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2775 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002776 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2777 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2778 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2779 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2780 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2781 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2782 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002783 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
2784 capitalize__doc__},
2785 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2786 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2787 endswith__doc__},
2788 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2789 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2790 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2791 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2792 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2793 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2794 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2795 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2796 startswith__doc__},
2797 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2798 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
2799 swapcase__doc__},
2800 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2801 translate__doc__},
2802 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2803 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2804 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2805 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2806 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
2807 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2808 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2809 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
2810 expandtabs__doc__},
2811 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
2812 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002813 {NULL, NULL} /* sentinel */
2814};
2815
Guido van Rossumae960af2001-08-30 03:11:59 +00002816staticforward PyObject *
2817str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2818
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002819static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002820string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002821{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002822 PyObject *x = NULL;
2823 static char *kwlist[] = {"object", 0};
2824
Guido van Rossumae960af2001-08-30 03:11:59 +00002825 if (type != &PyString_Type)
2826 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002827 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2828 return NULL;
2829 if (x == NULL)
2830 return PyString_FromString("");
2831 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002832}
2833
Guido van Rossumae960af2001-08-30 03:11:59 +00002834static PyObject *
2835str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2836{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002837 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002838 int n;
2839
2840 assert(PyType_IsSubtype(type, &PyString_Type));
2841 tmp = string_new(&PyString_Type, args, kwds);
2842 if (tmp == NULL)
2843 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002844 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002845 n = PyString_GET_SIZE(tmp);
2846 pnew = type->tp_alloc(type, n);
2847 if (pnew != NULL) {
2848 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002849 ((PyStringObject *)pnew)->ob_shash =
2850 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002851 ((PyStringObject *)pnew)->ob_sinterned =
2852 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002853 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002854 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002855 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002856}
2857
Guido van Rossumcacfc072002-05-24 19:01:59 +00002858static PyObject *
2859basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2860{
2861 PyErr_SetString(PyExc_TypeError,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002862 "The basestring type cannot be instantiated");
Guido van Rossumcacfc072002-05-24 19:01:59 +00002863 return NULL;
2864}
2865
2866static char basestring_doc[] =
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002867"Type basestring cannot be instantiated; it is the base for str and unicode.";
Guido van Rossumcacfc072002-05-24 19:01:59 +00002868
2869PyTypeObject PyBaseString_Type = {
2870 PyObject_HEAD_INIT(&PyType_Type)
2871 0,
Neal Norwitz32a7e7f2002-05-31 19:58:02 +00002872 "basestring",
Guido van Rossumcacfc072002-05-24 19:01:59 +00002873 0,
2874 0,
2875 0, /* tp_dealloc */
2876 0, /* tp_print */
2877 0, /* tp_getattr */
2878 0, /* tp_setattr */
2879 0, /* tp_compare */
2880 0, /* tp_repr */
2881 0, /* tp_as_number */
2882 0, /* tp_as_sequence */
2883 0, /* tp_as_mapping */
2884 0, /* tp_hash */
2885 0, /* tp_call */
2886 0, /* tp_str */
2887 0, /* tp_getattro */
2888 0, /* tp_setattro */
2889 0, /* tp_as_buffer */
2890 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
2891 basestring_doc, /* tp_doc */
2892 0, /* tp_traverse */
2893 0, /* tp_clear */
2894 0, /* tp_richcompare */
2895 0, /* tp_weaklistoffset */
2896 0, /* tp_iter */
2897 0, /* tp_iternext */
2898 0, /* tp_methods */
2899 0, /* tp_members */
2900 0, /* tp_getset */
2901 &PyBaseObject_Type, /* tp_base */
2902 0, /* tp_dict */
2903 0, /* tp_descr_get */
2904 0, /* tp_descr_set */
2905 0, /* tp_dictoffset */
2906 0, /* tp_init */
2907 0, /* tp_alloc */
2908 basestring_new, /* tp_new */
2909 0, /* tp_free */
2910};
2911
Tim Peters6d6c1a32001-08-02 04:15:00 +00002912static char string_doc[] =
2913"str(object) -> string\n\
2914\n\
2915Return a nice string representation of the object.\n\
2916If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002917
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002918PyTypeObject PyString_Type = {
2919 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002920 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002921 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002922 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002923 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002924 (destructor)string_dealloc, /* tp_dealloc */
2925 (printfunc)string_print, /* tp_print */
2926 0, /* tp_getattr */
2927 0, /* tp_setattr */
2928 0, /* tp_compare */
2929 (reprfunc)string_repr, /* tp_repr */
2930 0, /* tp_as_number */
2931 &string_as_sequence, /* tp_as_sequence */
2932 0, /* tp_as_mapping */
2933 (hashfunc)string_hash, /* tp_hash */
2934 0, /* tp_call */
2935 (reprfunc)string_str, /* tp_str */
2936 PyObject_GenericGetAttr, /* tp_getattro */
2937 0, /* tp_setattro */
2938 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002939 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002940 string_doc, /* tp_doc */
2941 0, /* tp_traverse */
2942 0, /* tp_clear */
2943 (richcmpfunc)string_richcompare, /* tp_richcompare */
2944 0, /* tp_weaklistoffset */
2945 0, /* tp_iter */
2946 0, /* tp_iternext */
2947 string_methods, /* tp_methods */
2948 0, /* tp_members */
2949 0, /* tp_getset */
Guido van Rossumcacfc072002-05-24 19:01:59 +00002950 &PyBaseString_Type, /* tp_base */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002951 0, /* tp_dict */
2952 0, /* tp_descr_get */
2953 0, /* tp_descr_set */
2954 0, /* tp_dictoffset */
2955 0, /* tp_init */
2956 0, /* tp_alloc */
2957 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00002958 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002959};
2960
2961void
Fred Drakeba096332000-07-09 07:04:36 +00002962PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002963{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002964 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002965 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002966 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002967 if (w == NULL || !PyString_Check(*pv)) {
2968 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002969 *pv = NULL;
2970 return;
2971 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002972 v = string_concat((PyStringObject *) *pv, w);
2973 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002974 *pv = v;
2975}
2976
Guido van Rossum013142a1994-08-30 08:19:36 +00002977void
Fred Drakeba096332000-07-09 07:04:36 +00002978PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002979{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002980 PyString_Concat(pv, w);
2981 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002982}
2983
2984
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002985/* The following function breaks the notion that strings are immutable:
2986 it changes the size of a string. We get away with this only if there
2987 is only one module referencing the object. You can also think of it
2988 as creating a new string object and destroying the old one, only
2989 more efficiently. In any case, don't use this if the string may
Tim Peters5de98422002-04-27 18:44:32 +00002990 already be known to some other part of the code...
2991 Note that if there's not enough memory to resize the string, the original
2992 string object at *pv is deallocated, *pv is set to NULL, an "out of
2993 memory" exception is set, and -1 is returned. Else (on success) 0 is
2994 returned, and the value in *pv may or may not be the same as on input.
2995 As always, an extra byte is allocated for a trailing \0 byte (newsize
2996 does *not* include that), and a trailing \0 byte is stored.
2997*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002998
2999int
Fred Drakeba096332000-07-09 07:04:36 +00003000_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003001{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003002 register PyObject *v;
3003 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003004 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003005 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003006 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003007 Py_DECREF(v);
3008 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003009 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003010 }
Guido van Rossum921842f1990-11-18 17:30:23 +00003011 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00003012#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00003013 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00003014#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003015 _Py_ForgetReference(v);
3016 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00003017 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003018 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003019 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00003020 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003021 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00003022 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003023 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003024 _Py_NewReference(*pv);
3025 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00003026 sv->ob_size = newsize;
3027 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00003028 return 0;
3029}
Guido van Rossume5372401993-03-16 12:15:04 +00003030
3031/* Helpers for formatstring */
3032
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003033static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003034getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00003035{
3036 int argidx = *p_argidx;
3037 if (argidx < arglen) {
3038 (*p_argidx)++;
3039 if (arglen < 0)
3040 return args;
3041 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003042 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00003043 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003044 PyErr_SetString(PyExc_TypeError,
3045 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00003046 return NULL;
3047}
3048
Tim Peters38fd5b62000-09-21 05:43:11 +00003049/* Format codes
3050 * F_LJUST '-'
3051 * F_SIGN '+'
3052 * F_BLANK ' '
3053 * F_ALT '#'
3054 * F_ZERO '0'
3055 */
Guido van Rossume5372401993-03-16 12:15:04 +00003056#define F_LJUST (1<<0)
3057#define F_SIGN (1<<1)
3058#define F_BLANK (1<<2)
3059#define F_ALT (1<<3)
3060#define F_ZERO (1<<4)
3061
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003062static int
Fred Drakeba096332000-07-09 07:04:36 +00003063formatfloat(char *buf, size_t buflen, int flags,
3064 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003065{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003066 /* fmt = '%#.' + `prec` + `type`
3067 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003068 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003069 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003070 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003071 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003072 if (prec < 0)
3073 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003074 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3075 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003076 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3077 (flags&F_ALT) ? "#" : "",
3078 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003079 /* worst case length calc to ensure no buffer overrun:
3080 fmt = %#.<prec>g
3081 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003082 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003083 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3084 If prec=0 the effective precision is 1 (the leading digit is
3085 always given), therefore increase by one to 10+prec. */
3086 if (buflen <= (size_t)10 + (size_t)prec) {
3087 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003088 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003089 return -1;
3090 }
Tim Peters885d4572001-11-28 20:27:42 +00003091 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003092 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003093}
3094
Tim Peters38fd5b62000-09-21 05:43:11 +00003095/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3096 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3097 * Python's regular ints.
3098 * Return value: a new PyString*, or NULL if error.
3099 * . *pbuf is set to point into it,
3100 * *plen set to the # of chars following that.
3101 * Caller must decref it when done using pbuf.
3102 * The string starting at *pbuf is of the form
3103 * "-"? ("0x" | "0X")? digit+
3104 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003105 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003106 * There will be at least prec digits, zero-filled on the left if
3107 * necessary to get that many.
3108 * val object to be converted
3109 * flags bitmask of format flags; only F_ALT is looked at
3110 * prec minimum number of digits; 0-fill on left if needed
3111 * type a character in [duoxX]; u acts the same as d
3112 *
3113 * CAUTION: o, x and X conversions on regular ints can never
3114 * produce a '-' sign, but can for Python's unbounded ints.
3115 */
3116PyObject*
3117_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3118 char **pbuf, int *plen)
3119{
3120 PyObject *result = NULL;
3121 char *buf;
3122 int i;
3123 int sign; /* 1 if '-', else 0 */
3124 int len; /* number of characters */
3125 int numdigits; /* len == numnondigits + numdigits */
3126 int numnondigits = 0;
3127
3128 switch (type) {
3129 case 'd':
3130 case 'u':
3131 result = val->ob_type->tp_str(val);
3132 break;
3133 case 'o':
3134 result = val->ob_type->tp_as_number->nb_oct(val);
3135 break;
3136 case 'x':
3137 case 'X':
3138 numnondigits = 2;
3139 result = val->ob_type->tp_as_number->nb_hex(val);
3140 break;
3141 default:
3142 assert(!"'type' not in [duoxX]");
3143 }
3144 if (!result)
3145 return NULL;
3146
3147 /* To modify the string in-place, there can only be one reference. */
3148 if (result->ob_refcnt != 1) {
3149 PyErr_BadInternalCall();
3150 return NULL;
3151 }
3152 buf = PyString_AsString(result);
3153 len = PyString_Size(result);
3154 if (buf[len-1] == 'L') {
3155 --len;
3156 buf[len] = '\0';
3157 }
3158 sign = buf[0] == '-';
3159 numnondigits += sign;
3160 numdigits = len - numnondigits;
3161 assert(numdigits > 0);
3162
Tim Petersfff53252001-04-12 18:38:48 +00003163 /* Get rid of base marker unless F_ALT */
3164 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003165 /* Need to skip 0x, 0X or 0. */
3166 int skipped = 0;
3167 switch (type) {
3168 case 'o':
3169 assert(buf[sign] == '0');
3170 /* If 0 is only digit, leave it alone. */
3171 if (numdigits > 1) {
3172 skipped = 1;
3173 --numdigits;
3174 }
3175 break;
3176 case 'x':
3177 case 'X':
3178 assert(buf[sign] == '0');
3179 assert(buf[sign + 1] == 'x');
3180 skipped = 2;
3181 numnondigits -= 2;
3182 break;
3183 }
3184 if (skipped) {
3185 buf += skipped;
3186 len -= skipped;
3187 if (sign)
3188 buf[0] = '-';
3189 }
3190 assert(len == numnondigits + numdigits);
3191 assert(numdigits > 0);
3192 }
3193
3194 /* Fill with leading zeroes to meet minimum width. */
3195 if (prec > numdigits) {
3196 PyObject *r1 = PyString_FromStringAndSize(NULL,
3197 numnondigits + prec);
3198 char *b1;
3199 if (!r1) {
3200 Py_DECREF(result);
3201 return NULL;
3202 }
3203 b1 = PyString_AS_STRING(r1);
3204 for (i = 0; i < numnondigits; ++i)
3205 *b1++ = *buf++;
3206 for (i = 0; i < prec - numdigits; i++)
3207 *b1++ = '0';
3208 for (i = 0; i < numdigits; i++)
3209 *b1++ = *buf++;
3210 *b1 = '\0';
3211 Py_DECREF(result);
3212 result = r1;
3213 buf = PyString_AS_STRING(result);
3214 len = numnondigits + prec;
3215 }
3216
3217 /* Fix up case for hex conversions. */
3218 switch (type) {
3219 case 'x':
3220 /* Need to convert all upper case letters to lower case. */
3221 for (i = 0; i < len; i++)
3222 if (buf[i] >= 'A' && buf[i] <= 'F')
3223 buf[i] += 'a'-'A';
3224 break;
3225 case 'X':
3226 /* Need to convert 0x to 0X (and -0x to -0X). */
3227 if (buf[sign + 1] == 'x')
3228 buf[sign + 1] = 'X';
3229 break;
3230 }
3231 *pbuf = buf;
3232 *plen = len;
3233 return result;
3234}
3235
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003236static int
Fred Drakeba096332000-07-09 07:04:36 +00003237formatint(char *buf, size_t buflen, int flags,
3238 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003239{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003240 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003241 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3242 + 1 + 1 = 24 */
3243 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003244 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003245
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003246 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003247 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003248 if (prec < 0)
3249 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003250
3251 if ((flags & F_ALT) &&
3252 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003253 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003254 * of issues that cause pain:
3255 * - when 0 is being converted, the C standard leaves off
3256 * the '0x' or '0X', which is inconsistent with other
3257 * %#x/%#X conversions and inconsistent with Python's
3258 * hex() function
3259 * - there are platforms that violate the standard and
3260 * convert 0 with the '0x' or '0X'
3261 * (Metrowerks, Compaq Tru64)
3262 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003263 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003264 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003265 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003266 * We can achieve the desired consistency by inserting our
3267 * own '0x' or '0X' prefix, and substituting %x/%X in place
3268 * of %#x/%#X.
3269 *
3270 * Note that this is the same approach as used in
3271 * formatint() in unicodeobject.c
3272 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003273 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003274 type, prec, type);
3275 }
3276 else {
3277 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003278 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003279 prec, type);
3280 }
3281
Tim Peters38fd5b62000-09-21 05:43:11 +00003282 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003283 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3284 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003285 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003286 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003287 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003288 return -1;
3289 }
Tim Peters885d4572001-11-28 20:27:42 +00003290 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003291 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003292}
3293
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003294static int
Fred Drakeba096332000-07-09 07:04:36 +00003295formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003296{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003297 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003298 if (PyString_Check(v)) {
3299 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003300 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003301 }
3302 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003303 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003304 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003305 }
3306 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003307 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003308}
3309
Guido van Rossum013142a1994-08-30 08:19:36 +00003310
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003311/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3312
3313 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3314 chars are formatted. XXX This is a magic number. Each formatting
3315 routine does bounds checking to ensure no overflow, but a better
3316 solution may be to malloc a buffer of appropriate size for each
3317 format. For now, the current solution is sufficient.
3318*/
3319#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003320
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003321PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003322PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003323{
3324 char *fmt, *res;
3325 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003326 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003327 PyObject *result, *orig_args;
3328#ifdef Py_USING_UNICODE
3329 PyObject *v, *w;
3330#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003331 PyObject *dict = NULL;
3332 if (format == NULL || !PyString_Check(format) || args == NULL) {
3333 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003334 return NULL;
3335 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003336 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003337 fmt = PyString_AS_STRING(format);
3338 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003339 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003340 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003341 if (result == NULL)
3342 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003343 res = PyString_AsString(result);
3344 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003345 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003346 argidx = 0;
3347 }
3348 else {
3349 arglen = -1;
3350 argidx = -2;
3351 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003352 if (args->ob_type->tp_as_mapping)
3353 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003354 while (--fmtcnt >= 0) {
3355 if (*fmt != '%') {
3356 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003357 rescnt = fmtcnt + 100;
3358 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003359 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003360 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003361 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003362 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003363 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003364 }
3365 *res++ = *fmt++;
3366 }
3367 else {
3368 /* Got a format specifier */
3369 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003370 int width = -1;
3371 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003372 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003373 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003374 PyObject *v = NULL;
3375 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003376 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003377 int sign;
3378 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003379 char formatbuf[FORMATBUFLEN];
3380 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003381#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003382 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003383 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003384#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003385
Guido van Rossumda9c2711996-12-05 21:58:58 +00003386 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003387 if (*fmt == '(') {
3388 char *keystart;
3389 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003390 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003391 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003392
3393 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003394 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003395 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003396 goto error;
3397 }
3398 ++fmt;
3399 --fmtcnt;
3400 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003401 /* Skip over balanced parentheses */
3402 while (pcount > 0 && --fmtcnt >= 0) {
3403 if (*fmt == ')')
3404 --pcount;
3405 else if (*fmt == '(')
3406 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003407 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003408 }
3409 keylen = fmt - keystart - 1;
3410 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003411 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003412 "incomplete format key");
3413 goto error;
3414 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003415 key = PyString_FromStringAndSize(keystart,
3416 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003417 if (key == NULL)
3418 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003419 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003420 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003421 args_owned = 0;
3422 }
3423 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003424 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003425 if (args == NULL) {
3426 goto error;
3427 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003428 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003429 arglen = -1;
3430 argidx = -2;
3431 }
Guido van Rossume5372401993-03-16 12:15:04 +00003432 while (--fmtcnt >= 0) {
3433 switch (c = *fmt++) {
3434 case '-': flags |= F_LJUST; continue;
3435 case '+': flags |= F_SIGN; continue;
3436 case ' ': flags |= F_BLANK; continue;
3437 case '#': flags |= F_ALT; continue;
3438 case '0': flags |= F_ZERO; continue;
3439 }
3440 break;
3441 }
3442 if (c == '*') {
3443 v = getnextarg(args, arglen, &argidx);
3444 if (v == NULL)
3445 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003446 if (!PyInt_Check(v)) {
3447 PyErr_SetString(PyExc_TypeError,
3448 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003449 goto error;
3450 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003451 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003452 if (width < 0) {
3453 flags |= F_LJUST;
3454 width = -width;
3455 }
Guido van Rossume5372401993-03-16 12:15:04 +00003456 if (--fmtcnt >= 0)
3457 c = *fmt++;
3458 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003459 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003460 width = c - '0';
3461 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003462 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003463 if (!isdigit(c))
3464 break;
3465 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003466 PyErr_SetString(
3467 PyExc_ValueError,
3468 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003469 goto error;
3470 }
3471 width = width*10 + (c - '0');
3472 }
3473 }
3474 if (c == '.') {
3475 prec = 0;
3476 if (--fmtcnt >= 0)
3477 c = *fmt++;
3478 if (c == '*') {
3479 v = getnextarg(args, arglen, &argidx);
3480 if (v == NULL)
3481 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003482 if (!PyInt_Check(v)) {
3483 PyErr_SetString(
3484 PyExc_TypeError,
3485 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003486 goto error;
3487 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003488 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003489 if (prec < 0)
3490 prec = 0;
3491 if (--fmtcnt >= 0)
3492 c = *fmt++;
3493 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003494 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003495 prec = c - '0';
3496 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003497 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003498 if (!isdigit(c))
3499 break;
3500 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003501 PyErr_SetString(
3502 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003503 "prec too big");
3504 goto error;
3505 }
3506 prec = prec*10 + (c - '0');
3507 }
3508 }
3509 } /* prec */
3510 if (fmtcnt >= 0) {
3511 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003512 if (--fmtcnt >= 0)
3513 c = *fmt++;
3514 }
3515 }
3516 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003517 PyErr_SetString(PyExc_ValueError,
3518 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003519 goto error;
3520 }
3521 if (c != '%') {
3522 v = getnextarg(args, arglen, &argidx);
3523 if (v == NULL)
3524 goto error;
3525 }
3526 sign = 0;
3527 fill = ' ';
3528 switch (c) {
3529 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003530 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003531 len = 1;
3532 break;
3533 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003534 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003535#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003536 if (PyUnicode_Check(v)) {
3537 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003538 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003539 goto unicode;
3540 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003541#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003542 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003543 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003544 else
3545 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003546 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003547 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003548 if (!PyString_Check(temp)) {
3549 PyErr_SetString(PyExc_TypeError,
3550 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003551 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003552 goto error;
3553 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003554 pbuf = PyString_AS_STRING(temp);
3555 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003556 if (prec >= 0 && len > prec)
3557 len = prec;
3558 break;
3559 case 'i':
3560 case 'd':
3561 case 'u':
3562 case 'o':
3563 case 'x':
3564 case 'X':
3565 if (c == 'i')
3566 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003567 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003568 temp = _PyString_FormatLong(v, flags,
3569 prec, c, &pbuf, &len);
3570 if (!temp)
3571 goto error;
3572 /* unbounded ints can always produce
3573 a sign character! */
3574 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003575 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003576 else {
3577 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003578 len = formatint(pbuf,
3579 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003580 flags, prec, c, v);
3581 if (len < 0)
3582 goto error;
3583 /* only d conversion is signed */
3584 sign = c == 'd';
3585 }
3586 if (flags & F_ZERO)
3587 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003588 break;
3589 case 'e':
3590 case 'E':
3591 case 'f':
3592 case 'g':
3593 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003594 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003595 len = formatfloat(pbuf, sizeof(formatbuf),
3596 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003597 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003598 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003599 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003600 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003601 fill = '0';
3602 break;
3603 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003604 pbuf = formatbuf;
3605 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003606 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003607 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003608 break;
3609 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003610 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003611 "unsupported format character '%c' (0x%x) "
3612 "at index %i",
3613 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003614 goto error;
3615 }
3616 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003617 if (*pbuf == '-' || *pbuf == '+') {
3618 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003619 len--;
3620 }
3621 else if (flags & F_SIGN)
3622 sign = '+';
3623 else if (flags & F_BLANK)
3624 sign = ' ';
3625 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003626 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003627 }
3628 if (width < len)
3629 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003630 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003631 reslen -= rescnt;
3632 rescnt = width + fmtcnt + 100;
3633 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003634 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003635 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003636 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003637 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003638 }
3639 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003640 if (fill != ' ')
3641 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003642 rescnt--;
3643 if (width > len)
3644 width--;
3645 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003646 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3647 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003648 assert(pbuf[1] == c);
3649 if (fill != ' ') {
3650 *res++ = *pbuf++;
3651 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003652 }
Tim Petersfff53252001-04-12 18:38:48 +00003653 rescnt -= 2;
3654 width -= 2;
3655 if (width < 0)
3656 width = 0;
3657 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003658 }
3659 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003660 do {
3661 --rescnt;
3662 *res++ = fill;
3663 } while (--width > len);
3664 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003665 if (fill == ' ') {
3666 if (sign)
3667 *res++ = sign;
3668 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003669 (c == 'x' || c == 'X')) {
3670 assert(pbuf[0] == '0');
3671 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003672 *res++ = *pbuf++;
3673 *res++ = *pbuf++;
3674 }
3675 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003676 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003677 res += len;
3678 rescnt -= len;
3679 while (--width >= len) {
3680 --rescnt;
3681 *res++ = ' ';
3682 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003683 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003684 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003685 "not all arguments converted during string formatting");
Guido van Rossum013142a1994-08-30 08:19:36 +00003686 goto error;
3687 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003688 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003689 } /* '%' */
3690 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003691 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003692 PyErr_SetString(PyExc_TypeError,
Raymond Hettinger0ebac972002-05-21 15:14:57 +00003693 "not all arguments converted during string formatting");
Guido van Rossume5372401993-03-16 12:15:04 +00003694 goto error;
3695 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003696 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003697 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003698 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003699 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003700 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003701
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003702#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003703 unicode:
3704 if (args_owned) {
3705 Py_DECREF(args);
3706 args_owned = 0;
3707 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003708 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003709 if (PyTuple_Check(orig_args) && argidx > 0) {
3710 PyObject *v;
3711 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3712 v = PyTuple_New(n);
3713 if (v == NULL)
3714 goto error;
3715 while (--n >= 0) {
3716 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3717 Py_INCREF(w);
3718 PyTuple_SET_ITEM(v, n, w);
3719 }
3720 args = v;
3721 } else {
3722 Py_INCREF(orig_args);
3723 args = orig_args;
3724 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003725 args_owned = 1;
3726 /* Take what we have of the result and let the Unicode formatting
3727 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003728 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003729 if (_PyString_Resize(&result, rescnt))
3730 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003731 fmtcnt = PyString_GET_SIZE(format) - \
3732 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003733 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3734 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003735 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003736 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003737 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003738 if (v == NULL)
3739 goto error;
3740 /* Paste what we have (result) to what the Unicode formatting
3741 function returned (v) and return the result (or error) */
3742 w = PyUnicode_Concat(result, v);
3743 Py_DECREF(result);
3744 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003745 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003746 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003747#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003748
Guido van Rossume5372401993-03-16 12:15:04 +00003749 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003750 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003751 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003752 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003753 }
Guido van Rossume5372401993-03-16 12:15:04 +00003754 return NULL;
3755}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003756
3757
Guido van Rossum2a61e741997-01-18 07:55:05 +00003758
Barry Warsaw4df762f2000-08-16 23:41:01 +00003759/* This dictionary will leak at PyString_Fini() time. That's acceptable
3760 * because PyString_Fini() specifically frees interned strings that are
3761 * only referenced by this dictionary. The CVS log entry for revision 2.45
3762 * says:
3763 *
3764 * Change the Fini function to only remove otherwise unreferenced
3765 * strings from the interned table. There are references in
3766 * hard-to-find static variables all over the interpreter, and it's not
3767 * worth trying to get rid of all those; but "uninterning" isn't fair
3768 * either and may cause subtle failures later -- so we have to keep them
3769 * in the interned table.
3770 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003771static PyObject *interned;
3772
3773void
Fred Drakeba096332000-07-09 07:04:36 +00003774PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003775{
3776 register PyStringObject *s = (PyStringObject *)(*p);
3777 PyObject *t;
3778 if (s == NULL || !PyString_Check(s))
3779 Py_FatalError("PyString_InternInPlace: strings only please!");
3780 if ((t = s->ob_sinterned) != NULL) {
3781 if (t == (PyObject *)s)
3782 return;
3783 Py_INCREF(t);
3784 *p = t;
3785 Py_DECREF(s);
3786 return;
3787 }
3788 if (interned == NULL) {
3789 interned = PyDict_New();
3790 if (interned == NULL)
3791 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003792 }
3793 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3794 Py_INCREF(t);
3795 *p = s->ob_sinterned = t;
3796 Py_DECREF(s);
3797 return;
3798 }
Tim Peters111f6092001-09-12 07:54:51 +00003799 /* Ensure that only true string objects appear in the intern dict,
3800 and as the value of ob_sinterned. */
3801 if (PyString_CheckExact(s)) {
3802 t = (PyObject *)s;
3803 if (PyDict_SetItem(interned, t, t) == 0) {
3804 s->ob_sinterned = t;
3805 return;
3806 }
3807 }
3808 else {
3809 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3810 PyString_GET_SIZE(s));
3811 if (t != NULL) {
3812 if (PyDict_SetItem(interned, t, t) == 0) {
3813 *p = s->ob_sinterned = t;
3814 Py_DECREF(s);
3815 return;
3816 }
3817 Py_DECREF(t);
3818 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003819 }
3820 PyErr_Clear();
3821}
3822
3823
3824PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003825PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003826{
3827 PyObject *s = PyString_FromString(cp);
3828 if (s == NULL)
3829 return NULL;
3830 PyString_InternInPlace(&s);
3831 return s;
3832}
3833
Guido van Rossum8cf04761997-08-02 02:57:45 +00003834void
Fred Drakeba096332000-07-09 07:04:36 +00003835PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003836{
3837 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003838 for (i = 0; i < UCHAR_MAX + 1; i++) {
3839 Py_XDECREF(characters[i]);
3840 characters[i] = NULL;
3841 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003842 Py_XDECREF(nullstring);
3843 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003844 if (interned) {
3845 int pos, changed;
3846 PyObject *key, *value;
3847 do {
3848 changed = 0;
3849 pos = 0;
3850 while (PyDict_Next(interned, &pos, &key, &value)) {
3851 if (key->ob_refcnt == 2 && key == value) {
3852 PyDict_DelItem(interned, key);
3853 changed = 1;
3854 }
3855 }
3856 } while (changed);
3857 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003858}
Barry Warsawa903ad982001-02-23 16:40:48 +00003859
Barry Warsawa903ad982001-02-23 16:40:48 +00003860void _Py_ReleaseInternedStrings(void)
3861{
3862 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003863 fprintf(stderr, "releasing interned strings\n");
3864 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003865 Py_DECREF(interned);
3866 interned = NULL;
3867 }
3868}