blob: 6a0eece665e1782f38192eed38a743d69b2b1d1f [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
18/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000019 For both PyString_FromString() and PyString_FromStringAndSize(), the
20 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000021 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000022
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000023 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000025
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For PyString_FromStringAndSize(), the parameter the parameter `str' is
27 either NULL or else points to a string containing at least `size' bytes.
28 For PyString_FromStringAndSize(), the string in the `str' parameter does
29 not have to be null-terminated. (Therefore it is safe to construct a
30 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
31 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
32 bytes (setting the last byte to the null terminating character) and you can
33 fill in the data yourself. If `str' is non-NULL then the resulting
34 PyString object must be treated as immutable and you must not fill in nor
35 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 The PyObject member `op->ob_size', which denotes the number of "extra
38 items" in a variable-size object, will contain the number of bytes
39 allocated for string data, not counting the null terminating character. It
40 is therefore equal to the equal to the `size' parameter (for
41 PyString_FromStringAndSize()) or the length of the string in the `str'
42 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000067 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000123 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
159 count = vargs;
160#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000161 /* step 1: figure out how large a buffer we need */
162 for (f = format; *f; f++) {
163 if (*f == '%') {
164 const char* p = f;
165 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
166 ;
167
168 /* skip the 'l' in %ld, since it doesn't change the
169 width. although only %d is supported (see
170 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000171 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000172 if (*f == 'l' && *(f+1) == 'd')
173 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000174
Barry Warsawdadace02001-08-24 18:32:06 +0000175 switch (*f) {
176 case 'c':
177 (void)va_arg(count, int);
178 /* fall through... */
179 case '%':
180 n++;
181 break;
182 case 'd': case 'i': case 'x':
183 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000184 /* 20 bytes is enough to hold a 64-bit
185 integer. Decimal takes the most space.
186 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 n += 20;
188 break;
189 case 's':
190 s = va_arg(count, char*);
191 n += strlen(s);
192 break;
193 case 'p':
194 (void) va_arg(count, int);
195 /* maximum 64-bit pointer representation:
196 * 0xffffffffffffffff
197 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000198 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000199 */
200 n += 19;
201 break;
202 default:
203 /* if we stumble upon an unknown
204 formatting code, copy the rest of
205 the format string to the output
206 string. (we cannot just skip the
207 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000208 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000209 n += strlen(p);
210 goto expand;
211 }
212 } else
213 n++;
214 }
215 expand:
216 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000217 /* Since we've analyzed how much space we need for the worst case,
218 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 string = PyString_FromStringAndSize(NULL, n);
220 if (!string)
221 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000222
Barry Warsawdadace02001-08-24 18:32:06 +0000223 s = PyString_AsString(string);
224
225 for (f = format; *f; f++) {
226 if (*f == '%') {
227 const char* p = f++;
228 int i, longflag = 0;
229 /* parse the width.precision part (we're only
230 interested in the precision value, if any) */
231 n = 0;
232 while (isdigit(Py_CHARMASK(*f)))
233 n = (n*10) + *f++ - '0';
234 if (*f == '.') {
235 f++;
236 n = 0;
237 while (isdigit(Py_CHARMASK(*f)))
238 n = (n*10) + *f++ - '0';
239 }
240 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
241 f++;
242 /* handle the long flag, but only for %ld. others
243 can be added when necessary. */
244 if (*f == 'l' && *(f+1) == 'd') {
245 longflag = 1;
246 ++f;
247 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000248
Barry Warsawdadace02001-08-24 18:32:06 +0000249 switch (*f) {
250 case 'c':
251 *s++ = va_arg(vargs, int);
252 break;
253 case 'd':
254 if (longflag)
255 sprintf(s, "%ld", va_arg(vargs, long));
256 else
257 sprintf(s, "%d", va_arg(vargs, int));
258 s += strlen(s);
259 break;
260 case 'i':
261 sprintf(s, "%i", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'x':
265 sprintf(s, "%x", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 's':
269 p = va_arg(vargs, char*);
270 i = strlen(p);
271 if (n > 0 && i > n)
272 i = n;
273 memcpy(s, p, i);
274 s += i;
275 break;
276 case 'p':
277 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000278 /* %p is ill-defined: ensure leading 0x. */
279 if (s[1] == 'X')
280 s[1] = 'x';
281 else if (s[1] != 'x') {
282 memmove(s+2, s, strlen(s)+1);
283 s[0] = '0';
284 s[1] = 'x';
285 }
Barry Warsawdadace02001-08-24 18:32:06 +0000286 s += strlen(s);
287 break;
288 case '%':
289 *s++ = '%';
290 break;
291 default:
292 strcpy(s, p);
293 s += strlen(s);
294 goto end;
295 }
296 } else
297 *s++ = *f;
298 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000299
Barry Warsawdadace02001-08-24 18:32:06 +0000300 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000301 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000302 return string;
303}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000304
Barry Warsawdadace02001-08-24 18:32:06 +0000305PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000306PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000307{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000308 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000309 va_list vargs;
310
311#ifdef HAVE_STDARG_PROTOTYPES
312 va_start(vargs, format);
313#else
314 va_start(vargs);
315#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 ret = PyString_FromFormatV(format, vargs);
317 va_end(vargs);
318 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319}
320
321
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000322PyObject *PyString_Decode(const char *s,
323 int size,
324 const char *encoding,
325 const char *errors)
326{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000327 PyObject *v, *str;
328
329 str = PyString_FromStringAndSize(s, size);
330 if (str == NULL)
331 return NULL;
332 v = PyString_AsDecodedString(str, encoding, errors);
333 Py_DECREF(str);
334 return v;
335}
336
337PyObject *PyString_AsDecodedObject(PyObject *str,
338 const char *encoding,
339 const char *errors)
340{
341 PyObject *v;
342
343 if (!PyString_Check(str)) {
344 PyErr_BadArgument();
345 goto onError;
346 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000347
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000348 if (encoding == NULL) {
349#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000351#else
352 PyErr_SetString(PyExc_ValueError, "no encoding specified");
353 goto onError;
354#endif
355 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356
357 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000358 v = PyCodec_Decode(str, encoding, errors);
359 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361
362 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000363
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365 return NULL;
366}
367
368PyObject *PyString_AsDecodedString(PyObject *str,
369 const char *encoding,
370 const char *errors)
371{
372 PyObject *v;
373
374 v = PyString_AsDecodedObject(str, encoding, errors);
375 if (v == NULL)
376 goto onError;
377
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000378#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000379 /* Convert Unicode to a string using the default encoding */
380 if (PyUnicode_Check(v)) {
381 PyObject *temp = v;
382 v = PyUnicode_AsEncodedString(v, NULL, NULL);
383 Py_DECREF(temp);
384 if (v == NULL)
385 goto onError;
386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 if (!PyString_Check(v)) {
389 PyErr_Format(PyExc_TypeError,
390 "decoder did not return a string object (type=%.400s)",
391 v->ob_type->tp_name);
392 Py_DECREF(v);
393 goto onError;
394 }
395
396 return v;
397
398 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 return NULL;
400}
401
402PyObject *PyString_Encode(const char *s,
403 int size,
404 const char *encoding,
405 const char *errors)
406{
407 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 str = PyString_FromStringAndSize(s, size);
410 if (str == NULL)
411 return NULL;
412 v = PyString_AsEncodedString(str, encoding, errors);
413 Py_DECREF(str);
414 return v;
415}
416
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000417PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 const char *encoding,
419 const char *errors)
420{
421 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000422
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000423 if (!PyString_Check(str)) {
424 PyErr_BadArgument();
425 goto onError;
426 }
427
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000428 if (encoding == NULL) {
429#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000431#else
432 PyErr_SetString(PyExc_ValueError, "no encoding specified");
433 goto onError;
434#endif
435 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436
437 /* Encode via the codec registry */
438 v = PyCodec_Encode(str, encoding, errors);
439 if (v == NULL)
440 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000441
442 return v;
443
444 onError:
445 return NULL;
446}
447
448PyObject *PyString_AsEncodedString(PyObject *str,
449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v;
453
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000454 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000455 if (v == NULL)
456 goto onError;
457
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000458#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 /* Convert Unicode to a string using the default encoding */
460 if (PyUnicode_Check(v)) {
461 PyObject *temp = v;
462 v = PyUnicode_AsEncodedString(v, NULL, NULL);
463 Py_DECREF(temp);
464 if (v == NULL)
465 goto onError;
466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(v)) {
469 PyErr_Format(PyExc_TypeError,
470 "encoder did not return a string object (type=%.400s)",
471 v->ob_type->tp_name);
472 Py_DECREF(v);
473 goto onError;
474 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000476 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000477
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 onError:
479 return NULL;
480}
481
Guido van Rossum234f9421993-06-17 12:35:49 +0000482static void
Fred Drakeba096332000-07-09 07:04:36 +0000483string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000484{
Guido van Rossum9475a232001-10-05 20:51:39 +0000485 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000486}
487
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000488static int
489string_getsize(register PyObject *op)
490{
491 char *s;
492 int len;
493 if (PyString_AsStringAndSize(op, &s, &len))
494 return -1;
495 return len;
496}
497
498static /*const*/ char *
499string_getbuffer(register PyObject *op)
500{
501 char *s;
502 int len;
503 if (PyString_AsStringAndSize(op, &s, &len))
504 return NULL;
505 return s;
506}
507
Guido van Rossumd7047b31995-01-02 19:07:15 +0000508int
Fred Drakeba096332000-07-09 07:04:36 +0000509PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000511 if (!PyString_Check(op))
512 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000513 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
516/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000517PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519 if (!PyString_Check(op))
520 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000521 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522}
523
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000524int
525PyString_AsStringAndSize(register PyObject *obj,
526 register char **s,
527 register int *len)
528{
529 if (s == NULL) {
530 PyErr_BadInternalCall();
531 return -1;
532 }
533
534 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000536 if (PyUnicode_Check(obj)) {
537 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
538 if (obj == NULL)
539 return -1;
540 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000541 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000542#endif
543 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000544 PyErr_Format(PyExc_TypeError,
545 "expected string or Unicode object, "
546 "%.200s found", obj->ob_type->tp_name);
547 return -1;
548 }
549 }
550
551 *s = PyString_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyString_GET_SIZE(obj);
554 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected string without null bytes");
557 return -1;
558 }
559 return 0;
560}
561
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562/* Methods */
563
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564static int
Fred Drakeba096332000-07-09 07:04:36 +0000565string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566{
567 int i;
568 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000570
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000571 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000572 if (! PyString_CheckExact(op)) {
573 int ret;
574 /* A str subclass may have its own __str__ method. */
575 op = (PyStringObject *) PyObject_Str((PyObject *)op);
576 if (op == NULL)
577 return -1;
578 ret = string_print(op, fp, flags);
579 Py_DECREF(op);
580 return ret;
581 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000582 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000584 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000586
Thomas Wouters7e474022000-07-16 12:04:32 +0000587 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000588 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000589 if (memchr(op->ob_sval, '\'', op->ob_size) &&
590 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 quote = '"';
592
593 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594 for (i = 0; i < op->ob_size; i++) {
595 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000596 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000598 else if (c == '\t')
599 fprintf(fp, "\\t");
600 else if (c == '\n')
601 fprintf(fp, "\\n");
602 else if (c == '\r')
603 fprintf(fp, "\\r");
604 else if (c < ' ' || c >= 0x7f)
605 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000606 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000607 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000608 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000609 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000610 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000611}
612
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000613static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000614string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000616 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
617 PyObject *v;
618 if (newsize > INT_MAX) {
619 PyErr_SetString(PyExc_OverflowError,
620 "string is too large to make repr");
621 }
622 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000624 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000625 }
626 else {
627 register int i;
628 register char c;
629 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000630 int quote;
631
Thomas Wouters7e474022000-07-16 12:04:32 +0000632 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000633 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000634 if (memchr(op->ob_sval, '\'', op->ob_size) &&
635 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000636 quote = '"';
637
Tim Peters9161c8b2001-12-03 01:55:38 +0000638 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000639 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000641 /* There's at least enough room for a hex escape
642 and a closing quote. */
643 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000645 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000646 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000647 else if (c == '\t')
648 *p++ = '\\', *p++ = 't';
649 else if (c == '\n')
650 *p++ = '\\', *p++ = 'n';
651 else if (c == '\r')
652 *p++ = '\\', *p++ = 'r';
653 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000654 /* For performance, we don't want to call
655 PyOS_snprintf here (extra layers of
656 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000657 sprintf(p, "\\x%02x", c & 0xff);
658 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000659 }
660 else
661 *p++ = c;
662 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000663 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000664 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000666 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000667 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000668 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670}
671
Guido van Rossum189f1df2001-05-01 16:51:53 +0000672static PyObject *
673string_str(PyObject *s)
674{
Tim Petersc9933152001-10-16 20:18:24 +0000675 assert(PyString_Check(s));
676 if (PyString_CheckExact(s)) {
677 Py_INCREF(s);
678 return s;
679 }
680 else {
681 /* Subtype -- return genuine string with the same value. */
682 PyStringObject *t = (PyStringObject *) s;
683 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
684 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000685}
686
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687static int
Fred Drakeba096332000-07-09 07:04:36 +0000688string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689{
690 return a->ob_size;
691}
692
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000694string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
696 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000697 register PyStringObject *op;
698 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000699#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (PyUnicode_Check(bb))
701 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000702#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000703 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000704 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000705 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706 return NULL;
707 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000708#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000710 if ((a->ob_size == 0 || b->ob_size == 0) &&
711 PyString_CheckExact(a) && PyString_CheckExact(b)) {
712 if (a->ob_size == 0) {
713 Py_INCREF(bb);
714 return bb;
715 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000716 Py_INCREF(a);
717 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718 }
719 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000720 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000721 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000722 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000723 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000724 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000725 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000726 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000727 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000728 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
729 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
730 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000731 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732#undef b
733}
734
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000736string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000737{
738 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000739 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000740 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000741 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742 if (n < 0)
743 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000744 /* watch out for overflows: the size can overflow int,
745 * and the # of bytes needed can overflow size_t
746 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000748 if (n && size / n != a->ob_size) {
749 PyErr_SetString(PyExc_OverflowError,
750 "repeated string is too long");
751 return NULL;
752 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000753 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000754 Py_INCREF(a);
755 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756 }
Tim Peters8f422462000-09-09 06:13:41 +0000757 nbytes = size * sizeof(char);
758 if (nbytes / sizeof(char) != (size_t)size ||
759 nbytes + sizeof(PyStringObject) <= nbytes) {
760 PyErr_SetString(PyExc_OverflowError,
761 "repeated string is too long");
762 return NULL;
763 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000764 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000765 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000766 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000768 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000769 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000770 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000771 for (i = 0; i < size; i += a->ob_size)
772 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
773 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000774 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000775}
776
777/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
778
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000779static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000780string_slice(register PyStringObject *a, register int i, register int j)
781 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782{
783 if (i < 0)
784 i = 0;
785 if (j < 0)
786 j = 0; /* Avoid signed/unsigned bug in next line */
787 if (j > a->ob_size)
788 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000789 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
790 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000791 Py_INCREF(a);
792 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
794 if (j < i)
795 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000796 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797}
798
Guido van Rossum9284a572000-03-07 15:53:43 +0000799static int
Fred Drakeba096332000-07-09 07:04:36 +0000800string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000801{
802 register char *s, *end;
803 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000804#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000805 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000806 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000807#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000808 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000809 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000810 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000811 return -1;
812 }
813 c = PyString_AsString(el)[0];
814 s = PyString_AsString(a);
815 end = s + PyString_Size(a);
816 while (s < end) {
817 if (c == *s++)
818 return 1;
819 }
820 return 0;
821}
822
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000823static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000824string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000826 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000827 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000829 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 return NULL;
831 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000832 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000833 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000834 if (v == NULL)
835 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000836 else {
837#ifdef COUNT_ALLOCS
838 one_strings++;
839#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000840 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000841 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000842 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843}
844
Martin v. Löwiscd353062001-05-24 16:56:35 +0000845static PyObject*
846string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000848 int c;
849 int len_a, len_b;
850 int min_len;
851 PyObject *result;
852
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000853 /* Make sure both arguments are strings. */
854 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000855 result = Py_NotImplemented;
856 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000857 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000858 if (a == b) {
859 switch (op) {
860 case Py_EQ:case Py_LE:case Py_GE:
861 result = Py_True;
862 goto out;
863 case Py_NE:case Py_LT:case Py_GT:
864 result = Py_False;
865 goto out;
866 }
867 }
868 if (op == Py_EQ) {
869 /* Supporting Py_NE here as well does not save
870 much time, since Py_NE is rarely used. */
871 if (a->ob_size == b->ob_size
872 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000873 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +0000874 a->ob_size) == 0)) {
875 result = Py_True;
876 } else {
877 result = Py_False;
878 }
879 goto out;
880 }
881 len_a = a->ob_size; len_b = b->ob_size;
882 min_len = (len_a < len_b) ? len_a : len_b;
883 if (min_len > 0) {
884 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
885 if (c==0)
886 c = memcmp(a->ob_sval, b->ob_sval, min_len);
887 }else
888 c = 0;
889 if (c == 0)
890 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
891 switch (op) {
892 case Py_LT: c = c < 0; break;
893 case Py_LE: c = c <= 0; break;
894 case Py_EQ: assert(0); break; /* unreachable */
895 case Py_NE: c = c != 0; break;
896 case Py_GT: c = c > 0; break;
897 case Py_GE: c = c >= 0; break;
898 default:
899 result = Py_NotImplemented;
900 goto out;
901 }
902 result = c ? Py_True : Py_False;
903 out:
904 Py_INCREF(result);
905 return result;
906}
907
908int
909_PyString_Eq(PyObject *o1, PyObject *o2)
910{
911 PyStringObject *a, *b;
912 a = (PyStringObject*)o1;
913 b = (PyStringObject*)o2;
914 return a->ob_size == b->ob_size
915 && *a->ob_sval == *b->ob_sval
916 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917}
918
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919static long
Fred Drakeba096332000-07-09 07:04:36 +0000920string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000921{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000922 register int len;
923 register unsigned char *p;
924 register long x;
925
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000926 if (a->ob_shash != -1)
927 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000928 if (a->ob_sinterned != NULL)
929 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000931 len = a->ob_size;
932 p = (unsigned char *) a->ob_sval;
933 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000934 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000935 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000936 x ^= a->ob_size;
937 if (x == -1)
938 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000939 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000940 return x;
941}
942
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000943static int
Fred Drakeba096332000-07-09 07:04:36 +0000944string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000945{
946 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000947 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000948 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000949 return -1;
950 }
951 *ptr = (void *)self->ob_sval;
952 return self->ob_size;
953}
954
955static int
Fred Drakeba096332000-07-09 07:04:36 +0000956string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000957{
Guido van Rossum045e6881997-09-08 18:30:11 +0000958 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000959 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000960 return -1;
961}
962
963static int
Fred Drakeba096332000-07-09 07:04:36 +0000964string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000965{
966 if ( lenp )
967 *lenp = self->ob_size;
968 return 1;
969}
970
Guido van Rossum1db70701998-10-08 02:18:52 +0000971static int
Fred Drakeba096332000-07-09 07:04:36 +0000972string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000973{
974 if ( index != 0 ) {
975 PyErr_SetString(PyExc_SystemError,
976 "accessing non-existent string segment");
977 return -1;
978 }
979 *ptr = self->ob_sval;
980 return self->ob_size;
981}
982
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000984 (inquiry)string_length, /*sq_length*/
985 (binaryfunc)string_concat, /*sq_concat*/
986 (intargfunc)string_repeat, /*sq_repeat*/
987 (intargfunc)string_item, /*sq_item*/
988 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000989 0, /*sq_ass_item*/
990 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000991 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992};
993
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000994static PyBufferProcs string_as_buffer = {
995 (getreadbufferproc)string_buffer_getreadbuf,
996 (getwritebufferproc)string_buffer_getwritebuf,
997 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000998 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000999};
1000
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001001
1002
1003#define LEFTSTRIP 0
1004#define RIGHTSTRIP 1
1005#define BOTHSTRIP 2
1006
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001007/* Arrays indexed by above */
1008static const char *stripname[] = {"lstrip", "rstrip", "strip"};
1009
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001010
1011static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001012split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001013{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001014 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001015 PyObject* item;
1016 PyObject *list = PyList_New(0);
1017
1018 if (list == NULL)
1019 return NULL;
1020
Guido van Rossum4c08d552000-03-10 22:55:18 +00001021 for (i = j = 0; i < len; ) {
1022 while (i < len && isspace(Py_CHARMASK(s[i])))
1023 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001024 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001025 while (i < len && !isspace(Py_CHARMASK(s[i])))
1026 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001027 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001028 if (maxsplit-- <= 0)
1029 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001030 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1031 if (item == NULL)
1032 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001033 err = PyList_Append(list, item);
1034 Py_DECREF(item);
1035 if (err < 0)
1036 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001037 while (i < len && isspace(Py_CHARMASK(s[i])))
1038 i++;
1039 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001040 }
1041 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001042 if (j < len) {
1043 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1044 if (item == NULL)
1045 goto finally;
1046 err = PyList_Append(list, item);
1047 Py_DECREF(item);
1048 if (err < 0)
1049 goto finally;
1050 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001051 return list;
1052 finally:
1053 Py_DECREF(list);
1054 return NULL;
1055}
1056
1057
1058static char split__doc__[] =
1059"S.split([sep [,maxsplit]]) -> list of strings\n\
1060\n\
1061Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001062delimiter string. If maxsplit is given, at most maxsplit\n\
1063splits are done. If sep is not specified, any whitespace string\n\
1064is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001065
1066static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001067string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001068{
1069 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001070 int maxsplit = -1;
1071 const char *s = PyString_AS_STRING(self), *sub;
1072 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001073
Guido van Rossum4c08d552000-03-10 22:55:18 +00001074 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001075 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001076 if (maxsplit < 0)
1077 maxsplit = INT_MAX;
1078 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001079 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001080 if (PyString_Check(subobj)) {
1081 sub = PyString_AS_STRING(subobj);
1082 n = PyString_GET_SIZE(subobj);
1083 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001084#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001085 else if (PyUnicode_Check(subobj))
1086 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001087#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001088 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1089 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001090 if (n == 0) {
1091 PyErr_SetString(PyExc_ValueError, "empty separator");
1092 return NULL;
1093 }
1094
1095 list = PyList_New(0);
1096 if (list == NULL)
1097 return NULL;
1098
1099 i = j = 0;
1100 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001101 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001102 if (maxsplit-- <= 0)
1103 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001104 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1105 if (item == NULL)
1106 goto fail;
1107 err = PyList_Append(list, item);
1108 Py_DECREF(item);
1109 if (err < 0)
1110 goto fail;
1111 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001112 }
1113 else
1114 i++;
1115 }
1116 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1117 if (item == NULL)
1118 goto fail;
1119 err = PyList_Append(list, item);
1120 Py_DECREF(item);
1121 if (err < 0)
1122 goto fail;
1123
1124 return list;
1125
1126 fail:
1127 Py_DECREF(list);
1128 return NULL;
1129}
1130
1131
1132static char join__doc__[] =
1133"S.join(sequence) -> string\n\
1134\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001135Return a string which is the concatenation of the strings in the\n\
1136sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001137
1138static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001139string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001140{
1141 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001142 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001143 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001144 char *p;
1145 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001146 size_t sz = 0;
1147 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001148 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001149
Tim Peters19fe14e2001-01-19 03:03:47 +00001150 seq = PySequence_Fast(orig, "");
1151 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001152 if (PyErr_ExceptionMatches(PyExc_TypeError))
1153 PyErr_Format(PyExc_TypeError,
1154 "sequence expected, %.80s found",
1155 orig->ob_type->tp_name);
1156 return NULL;
1157 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001158
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001159 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001160 if (seqlen == 0) {
1161 Py_DECREF(seq);
1162 return PyString_FromString("");
1163 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001164 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001165 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001166 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1167 PyErr_Format(PyExc_TypeError,
1168 "sequence item 0: expected string,"
1169 " %.80s found",
1170 item->ob_type->tp_name);
1171 Py_DECREF(seq);
1172 return NULL;
1173 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001174 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001175 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001176 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001177 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001178
Tim Peters19fe14e2001-01-19 03:03:47 +00001179 /* There are at least two things to join. Do a pre-pass to figure out
1180 * the total amount of space we'll need (sz), see whether any argument
1181 * is absurd, and defer to the Unicode join if appropriate.
1182 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001183 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001184 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001185 item = PySequence_Fast_GET_ITEM(seq, i);
1186 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001187#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001188 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001189 /* Defer to Unicode join.
1190 * CAUTION: There's no gurantee that the
1191 * original sequence can be iterated over
1192 * again, so we must pass seq here.
1193 */
1194 PyObject *result;
1195 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001196 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001197 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001198 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001199#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001200 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001201 "sequence item %i: expected string,"
1202 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001203 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001204 Py_DECREF(seq);
1205 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001206 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001207 sz += PyString_GET_SIZE(item);
1208 if (i != 0)
1209 sz += seplen;
1210 if (sz < old_sz || sz > INT_MAX) {
1211 PyErr_SetString(PyExc_OverflowError,
1212 "join() is too long for a Python string");
1213 Py_DECREF(seq);
1214 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001215 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001216 }
1217
1218 /* Allocate result space. */
1219 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1220 if (res == NULL) {
1221 Py_DECREF(seq);
1222 return NULL;
1223 }
1224
1225 /* Catenate everything. */
1226 p = PyString_AS_STRING(res);
1227 for (i = 0; i < seqlen; ++i) {
1228 size_t n;
1229 item = PySequence_Fast_GET_ITEM(seq, i);
1230 n = PyString_GET_SIZE(item);
1231 memcpy(p, PyString_AS_STRING(item), n);
1232 p += n;
1233 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001234 memcpy(p, sep, seplen);
1235 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001236 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001237 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001238
Jeremy Hylton49048292000-07-11 03:28:17 +00001239 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001240 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001241}
1242
Tim Peters52e155e2001-06-16 05:42:57 +00001243PyObject *
1244_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001245{
Tim Petersa7259592001-06-16 05:11:17 +00001246 assert(sep != NULL && PyString_Check(sep));
1247 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001248 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001249}
1250
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001251static long
Fred Drakeba096332000-07-09 07:04:36 +00001252string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001253{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001254 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001255 int len = PyString_GET_SIZE(self);
1256 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001257 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001258
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001259 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001260 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001261 return -2;
1262 if (PyString_Check(subobj)) {
1263 sub = PyString_AS_STRING(subobj);
1264 n = PyString_GET_SIZE(subobj);
1265 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001266#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001267 else if (PyUnicode_Check(subobj))
1268 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001269#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001270 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001271 return -2;
1272
1273 if (last > len)
1274 last = len;
1275 if (last < 0)
1276 last += len;
1277 if (last < 0)
1278 last = 0;
1279 if (i < 0)
1280 i += len;
1281 if (i < 0)
1282 i = 0;
1283
Guido van Rossum4c08d552000-03-10 22:55:18 +00001284 if (dir > 0) {
1285 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001286 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001287 last -= n;
1288 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001289 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001290 return (long)i;
1291 }
1292 else {
1293 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001294
Guido van Rossum4c08d552000-03-10 22:55:18 +00001295 if (n == 0 && i <= last)
1296 return (long)last;
1297 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001298 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001299 return (long)j;
1300 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001301
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001302 return -1;
1303}
1304
1305
1306static char find__doc__[] =
1307"S.find(sub [,start [,end]]) -> int\n\
1308\n\
1309Return the lowest index in S where substring sub is found,\n\
1310such that sub is contained within s[start,end]. Optional\n\
1311arguments start and end are interpreted as in slice notation.\n\
1312\n\
1313Return -1 on failure.";
1314
1315static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001316string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001317{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001318 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319 if (result == -2)
1320 return NULL;
1321 return PyInt_FromLong(result);
1322}
1323
1324
1325static char index__doc__[] =
1326"S.index(sub [,start [,end]]) -> int\n\
1327\n\
1328Like S.find() but raise ValueError when the substring is not found.";
1329
1330static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001331string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001333 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334 if (result == -2)
1335 return NULL;
1336 if (result == -1) {
1337 PyErr_SetString(PyExc_ValueError,
1338 "substring not found in string.index");
1339 return NULL;
1340 }
1341 return PyInt_FromLong(result);
1342}
1343
1344
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345static char rfind__doc__[] =
1346"S.rfind(sub [,start [,end]]) -> int\n\
1347\n\
1348Return the highest index in S where substring sub is found,\n\
1349such that sub is contained within s[start,end]. Optional\n\
1350arguments start and end are interpreted as in slice notation.\n\
1351\n\
1352Return -1 on failure.";
1353
1354static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001355string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001356{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001357 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358 if (result == -2)
1359 return NULL;
1360 return PyInt_FromLong(result);
1361}
1362
1363
1364static char rindex__doc__[] =
1365"S.rindex(sub [,start [,end]]) -> int\n\
1366\n\
1367Like S.rfind() but raise ValueError when the substring is not found.";
1368
1369static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001370string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001372 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001373 if (result == -2)
1374 return NULL;
1375 if (result == -1) {
1376 PyErr_SetString(PyExc_ValueError,
1377 "substring not found in string.rindex");
1378 return NULL;
1379 }
1380 return PyInt_FromLong(result);
1381}
1382
1383
1384static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001385do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1386{
1387 char *s = PyString_AS_STRING(self);
1388 int len = PyString_GET_SIZE(self);
1389 char *sep = PyString_AS_STRING(sepobj);
1390 int seplen = PyString_GET_SIZE(sepobj);
1391 int i, j;
1392
1393 i = 0;
1394 if (striptype != RIGHTSTRIP) {
1395 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1396 i++;
1397 }
1398 }
1399
1400 j = len;
1401 if (striptype != LEFTSTRIP) {
1402 do {
1403 j--;
1404 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1405 j++;
1406 }
1407
1408 if (i == 0 && j == len && PyString_CheckExact(self)) {
1409 Py_INCREF(self);
1410 return (PyObject*)self;
1411 }
1412 else
1413 return PyString_FromStringAndSize(s+i, j-i);
1414}
1415
1416
1417static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001418do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419{
1420 char *s = PyString_AS_STRING(self);
1421 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001423 i = 0;
1424 if (striptype != RIGHTSTRIP) {
1425 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1426 i++;
1427 }
1428 }
1429
1430 j = len;
1431 if (striptype != LEFTSTRIP) {
1432 do {
1433 j--;
1434 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1435 j++;
1436 }
1437
Tim Peters8fa5dd02001-09-12 02:18:30 +00001438 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001439 Py_INCREF(self);
1440 return (PyObject*)self;
1441 }
1442 else
1443 return PyString_FromStringAndSize(s+i, j-i);
1444}
1445
1446
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001447static PyObject *
1448do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1449{
1450 PyObject *sep = NULL;
1451
1452 if (!PyArg_ParseTuple(args, "|O:[lr]strip", &sep))
1453 return NULL;
1454
1455 if (sep != NULL && sep != Py_None) {
1456 /* XXX What about Unicode? */
1457 if (!PyString_Check(sep)) {
1458 PyErr_Format(PyExc_TypeError,
1459 "%s arg must be None or string",
1460 stripname[striptype]);
1461 return NULL;
1462 }
1463 return do_xstrip(self, striptype, sep);
1464 }
1465
1466 return do_strip(self, striptype);
1467}
1468
1469
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001470static char strip__doc__[] =
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001471"S.strip([sep]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001472\n\
1473Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001474whitespace removed.\n\
1475If sep is given and not None, remove characters in sep instead.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001476
1477static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001478string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001480 if (PyTuple_GET_SIZE(args) == 0)
1481 return do_strip(self, BOTHSTRIP); /* Common case */
1482 else
1483 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484}
1485
1486
1487static char lstrip__doc__[] =
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001488"S.lstrip([sep]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001489\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001490Return a copy of the string S with leading whitespace removed.\n\
1491If sep is given and not None, remove characters in sep instead.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001492
1493static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001494string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001496 if (PyTuple_GET_SIZE(args) == 0)
1497 return do_strip(self, LEFTSTRIP); /* Common case */
1498 else
1499 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001500}
1501
1502
1503static char rstrip__doc__[] =
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001504"S.rstrip([sep]) -> string\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001505\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001506Return a copy of the string S with trailing whitespace removed.\n\
1507If sep is given and not None, remove characters in sep instead.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001508
1509static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001510string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001511{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001512 if (PyTuple_GET_SIZE(args) == 0)
1513 return do_strip(self, RIGHTSTRIP); /* Common case */
1514 else
1515 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001516}
1517
1518
1519static char lower__doc__[] =
1520"S.lower() -> string\n\
1521\n\
1522Return a copy of the string S converted to lowercase.";
1523
1524static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001525string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001526{
1527 char *s = PyString_AS_STRING(self), *s_new;
1528 int i, n = PyString_GET_SIZE(self);
1529 PyObject *new;
1530
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 new = PyString_FromStringAndSize(NULL, n);
1532 if (new == NULL)
1533 return NULL;
1534 s_new = PyString_AsString(new);
1535 for (i = 0; i < n; i++) {
1536 int c = Py_CHARMASK(*s++);
1537 if (isupper(c)) {
1538 *s_new = tolower(c);
1539 } else
1540 *s_new = c;
1541 s_new++;
1542 }
1543 return new;
1544}
1545
1546
1547static char upper__doc__[] =
1548"S.upper() -> string\n\
1549\n\
1550Return a copy of the string S converted to uppercase.";
1551
1552static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001553string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001554{
1555 char *s = PyString_AS_STRING(self), *s_new;
1556 int i, n = PyString_GET_SIZE(self);
1557 PyObject *new;
1558
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001559 new = PyString_FromStringAndSize(NULL, n);
1560 if (new == NULL)
1561 return NULL;
1562 s_new = PyString_AsString(new);
1563 for (i = 0; i < n; i++) {
1564 int c = Py_CHARMASK(*s++);
1565 if (islower(c)) {
1566 *s_new = toupper(c);
1567 } else
1568 *s_new = c;
1569 s_new++;
1570 }
1571 return new;
1572}
1573
1574
Guido van Rossum4c08d552000-03-10 22:55:18 +00001575static char title__doc__[] =
1576"S.title() -> string\n\
1577\n\
1578Return a titlecased version of S, i.e. words start with uppercase\n\
1579characters, all remaining cased characters have lowercase.";
1580
1581static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001582string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001583{
1584 char *s = PyString_AS_STRING(self), *s_new;
1585 int i, n = PyString_GET_SIZE(self);
1586 int previous_is_cased = 0;
1587 PyObject *new;
1588
Guido van Rossum4c08d552000-03-10 22:55:18 +00001589 new = PyString_FromStringAndSize(NULL, n);
1590 if (new == NULL)
1591 return NULL;
1592 s_new = PyString_AsString(new);
1593 for (i = 0; i < n; i++) {
1594 int c = Py_CHARMASK(*s++);
1595 if (islower(c)) {
1596 if (!previous_is_cased)
1597 c = toupper(c);
1598 previous_is_cased = 1;
1599 } else if (isupper(c)) {
1600 if (previous_is_cased)
1601 c = tolower(c);
1602 previous_is_cased = 1;
1603 } else
1604 previous_is_cased = 0;
1605 *s_new++ = c;
1606 }
1607 return new;
1608}
1609
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610static char capitalize__doc__[] =
1611"S.capitalize() -> string\n\
1612\n\
1613Return a copy of the string S with only its first character\n\
1614capitalized.";
1615
1616static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001617string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618{
1619 char *s = PyString_AS_STRING(self), *s_new;
1620 int i, n = PyString_GET_SIZE(self);
1621 PyObject *new;
1622
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001623 new = PyString_FromStringAndSize(NULL, n);
1624 if (new == NULL)
1625 return NULL;
1626 s_new = PyString_AsString(new);
1627 if (0 < n) {
1628 int c = Py_CHARMASK(*s++);
1629 if (islower(c))
1630 *s_new = toupper(c);
1631 else
1632 *s_new = c;
1633 s_new++;
1634 }
1635 for (i = 1; i < n; i++) {
1636 int c = Py_CHARMASK(*s++);
1637 if (isupper(c))
1638 *s_new = tolower(c);
1639 else
1640 *s_new = c;
1641 s_new++;
1642 }
1643 return new;
1644}
1645
1646
1647static char count__doc__[] =
1648"S.count(sub[, start[, end]]) -> int\n\
1649\n\
1650Return the number of occurrences of substring sub in string\n\
1651S[start:end]. Optional arguments start and end are\n\
1652interpreted as in slice notation.";
1653
1654static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001655string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001657 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658 int len = PyString_GET_SIZE(self), n;
1659 int i = 0, last = INT_MAX;
1660 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001661 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001662
Guido van Rossumc6821402000-05-08 14:08:05 +00001663 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1664 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001665 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001666
Guido van Rossum4c08d552000-03-10 22:55:18 +00001667 if (PyString_Check(subobj)) {
1668 sub = PyString_AS_STRING(subobj);
1669 n = PyString_GET_SIZE(subobj);
1670 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001671#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001672 else if (PyUnicode_Check(subobj)) {
1673 int count;
1674 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1675 if (count == -1)
1676 return NULL;
1677 else
1678 return PyInt_FromLong((long) count);
1679 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001680#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1682 return NULL;
1683
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001684 if (last > len)
1685 last = len;
1686 if (last < 0)
1687 last += len;
1688 if (last < 0)
1689 last = 0;
1690 if (i < 0)
1691 i += len;
1692 if (i < 0)
1693 i = 0;
1694 m = last + 1 - n;
1695 if (n == 0)
1696 return PyInt_FromLong((long) (m-i));
1697
1698 r = 0;
1699 while (i < m) {
1700 if (!memcmp(s+i, sub, n)) {
1701 r++;
1702 i += n;
1703 } else {
1704 i++;
1705 }
1706 }
1707 return PyInt_FromLong((long) r);
1708}
1709
1710
1711static char swapcase__doc__[] =
1712"S.swapcase() -> string\n\
1713\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001714Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001715converted to lowercase and vice versa.";
1716
1717static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001718string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719{
1720 char *s = PyString_AS_STRING(self), *s_new;
1721 int i, n = PyString_GET_SIZE(self);
1722 PyObject *new;
1723
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001724 new = PyString_FromStringAndSize(NULL, n);
1725 if (new == NULL)
1726 return NULL;
1727 s_new = PyString_AsString(new);
1728 for (i = 0; i < n; i++) {
1729 int c = Py_CHARMASK(*s++);
1730 if (islower(c)) {
1731 *s_new = toupper(c);
1732 }
1733 else if (isupper(c)) {
1734 *s_new = tolower(c);
1735 }
1736 else
1737 *s_new = c;
1738 s_new++;
1739 }
1740 return new;
1741}
1742
1743
1744static char translate__doc__[] =
1745"S.translate(table [,deletechars]) -> string\n\
1746\n\
1747Return a copy of the string S, where all characters occurring\n\
1748in the optional argument deletechars are removed, and the\n\
1749remaining characters have been mapped through the given\n\
1750translation table, which must be a string of length 256.";
1751
1752static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001753string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 register char *input, *output;
1756 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757 register int i, c, changed = 0;
1758 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001759 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001760 int inlen, tablen, dellen = 0;
1761 PyObject *result;
1762 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001763 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001764
Guido van Rossum4c08d552000-03-10 22:55:18 +00001765 if (!PyArg_ParseTuple(args, "O|O:translate",
1766 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001768
1769 if (PyString_Check(tableobj)) {
1770 table1 = PyString_AS_STRING(tableobj);
1771 tablen = PyString_GET_SIZE(tableobj);
1772 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001773#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001774 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001775 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001776 parameter; instead a mapping to None will cause characters
1777 to be deleted. */
1778 if (delobj != NULL) {
1779 PyErr_SetString(PyExc_TypeError,
1780 "deletions are implemented differently for unicode");
1781 return NULL;
1782 }
1783 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1784 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001785#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001786 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001787 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001788
1789 if (delobj != NULL) {
1790 if (PyString_Check(delobj)) {
1791 del_table = PyString_AS_STRING(delobj);
1792 dellen = PyString_GET_SIZE(delobj);
1793 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001794#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001795 else if (PyUnicode_Check(delobj)) {
1796 PyErr_SetString(PyExc_TypeError,
1797 "deletions are implemented differently for unicode");
1798 return NULL;
1799 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001800#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001801 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1802 return NULL;
1803
1804 if (tablen != 256) {
1805 PyErr_SetString(PyExc_ValueError,
1806 "translation table must be 256 characters long");
1807 return NULL;
1808 }
1809 }
1810 else {
1811 del_table = NULL;
1812 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813 }
1814
1815 table = table1;
1816 inlen = PyString_Size(input_obj);
1817 result = PyString_FromStringAndSize((char *)NULL, inlen);
1818 if (result == NULL)
1819 return NULL;
1820 output_start = output = PyString_AsString(result);
1821 input = PyString_AsString(input_obj);
1822
1823 if (dellen == 0) {
1824 /* If no deletions are required, use faster code */
1825 for (i = inlen; --i >= 0; ) {
1826 c = Py_CHARMASK(*input++);
1827 if (Py_CHARMASK((*output++ = table[c])) != c)
1828 changed = 1;
1829 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001830 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 return result;
1832 Py_DECREF(result);
1833 Py_INCREF(input_obj);
1834 return input_obj;
1835 }
1836
1837 for (i = 0; i < 256; i++)
1838 trans_table[i] = Py_CHARMASK(table[i]);
1839
1840 for (i = 0; i < dellen; i++)
1841 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1842
1843 for (i = inlen; --i >= 0; ) {
1844 c = Py_CHARMASK(*input++);
1845 if (trans_table[c] != -1)
1846 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1847 continue;
1848 changed = 1;
1849 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001850 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851 Py_DECREF(result);
1852 Py_INCREF(input_obj);
1853 return input_obj;
1854 }
1855 /* Fix the size of the resulting string */
1856 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1857 return NULL;
1858 return result;
1859}
1860
1861
1862/* What follows is used for implementing replace(). Perry Stoll. */
1863
1864/*
1865 mymemfind
1866
1867 strstr replacement for arbitrary blocks of memory.
1868
Barry Warsaw51ac5802000-03-20 16:36:48 +00001869 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001870 contents of memory pointed to by PAT. Returns the index into MEM if
1871 found, or -1 if not found. If len of PAT is greater than length of
1872 MEM, the function returns -1.
1873*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001874static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001875mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001876{
1877 register int ii;
1878
1879 /* pattern can not occur in the last pat_len-1 chars */
1880 len -= pat_len;
1881
1882 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001883 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001884 return ii;
1885 }
1886 }
1887 return -1;
1888}
1889
1890/*
1891 mymemcnt
1892
1893 Return the number of distinct times PAT is found in MEM.
1894 meaning mem=1111 and pat==11 returns 2.
1895 mem=11111 and pat==11 also return 2.
1896 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001897static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001898mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001899{
1900 register int offset = 0;
1901 int nfound = 0;
1902
1903 while (len >= 0) {
1904 offset = mymemfind(mem, len, pat, pat_len);
1905 if (offset == -1)
1906 break;
1907 mem += offset + pat_len;
1908 len -= offset + pat_len;
1909 nfound++;
1910 }
1911 return nfound;
1912}
1913
1914/*
1915 mymemreplace
1916
Thomas Wouters7e474022000-07-16 12:04:32 +00001917 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001918 replaced with SUB.
1919
Thomas Wouters7e474022000-07-16 12:04:32 +00001920 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001921 of PAT in STR, then the original string is returned. Otherwise, a new
1922 string is allocated here and returned.
1923
1924 on return, out_len is:
1925 the length of output string, or
1926 -1 if the input string is returned, or
1927 unchanged if an error occurs (no memory).
1928
1929 return value is:
1930 the new string allocated locally, or
1931 NULL if an error occurred.
1932*/
1933static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001934mymemreplace(const char *str, int len, /* input string */
1935 const char *pat, int pat_len, /* pattern string to find */
1936 const char *sub, int sub_len, /* substitution string */
1937 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001938 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001939{
1940 char *out_s;
1941 char *new_s;
1942 int nfound, offset, new_len;
1943
1944 if (len == 0 || pat_len > len)
1945 goto return_same;
1946
1947 /* find length of output string */
1948 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001949 if (count < 0)
1950 count = INT_MAX;
1951 else if (nfound > count)
1952 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001953 if (nfound == 0)
1954 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001955
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001956 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001957 if (new_len == 0) {
1958 /* Have to allocate something for the caller to free(). */
1959 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001960 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001961 return NULL;
1962 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001963 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001964 else {
1965 assert(new_len > 0);
1966 new_s = (char *)PyMem_MALLOC(new_len);
1967 if (new_s == NULL)
1968 return NULL;
1969 out_s = new_s;
1970
Tim Peters9c012af2001-05-10 00:32:57 +00001971 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001972 /* find index of next instance of pattern */
1973 offset = mymemfind(str, len, pat, pat_len);
1974 if (offset == -1)
1975 break;
1976
1977 /* copy non matching part of input string */
1978 memcpy(new_s, str, offset);
1979 str += offset + pat_len;
1980 len -= offset + pat_len;
1981
1982 /* copy substitute into the output string */
1983 new_s += offset;
1984 memcpy(new_s, sub, sub_len);
1985 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001986 }
1987 /* copy any remaining values into output string */
1988 if (len > 0)
1989 memcpy(new_s, str, len);
1990 }
1991 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001992 return out_s;
1993
1994 return_same:
1995 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001996 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001997}
1998
1999
2000static char replace__doc__[] =
2001"S.replace (old, new[, maxsplit]) -> string\n\
2002\n\
2003Return a copy of string S with all occurrences of substring\n\
2004old replaced by new. If the optional argument maxsplit is\n\
2005given, only the first maxsplit occurrences are replaced.";
2006
2007static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002008string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002009{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002010 const char *str = PyString_AS_STRING(self), *sub, *repl;
2011 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002012 const int len = PyString_GET_SIZE(self);
2013 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002014 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002015 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002016 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002017
Guido van Rossum4c08d552000-03-10 22:55:18 +00002018 if (!PyArg_ParseTuple(args, "OO|i:replace",
2019 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002020 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002021
2022 if (PyString_Check(subobj)) {
2023 sub = PyString_AS_STRING(subobj);
2024 sub_len = PyString_GET_SIZE(subobj);
2025 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002026#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002027 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002028 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002029 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002030#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002031 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2032 return NULL;
2033
2034 if (PyString_Check(replobj)) {
2035 repl = PyString_AS_STRING(replobj);
2036 repl_len = PyString_GET_SIZE(replobj);
2037 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002038#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002039 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002040 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002041 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002042#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002043 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2044 return NULL;
2045
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002046 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002047 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002048 return NULL;
2049 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002050 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002051 if (new_s == NULL) {
2052 PyErr_NoMemory();
2053 return NULL;
2054 }
2055 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002056 if (PyString_CheckExact(self)) {
2057 /* we're returning another reference to self */
2058 new = (PyObject*)self;
2059 Py_INCREF(new);
2060 }
2061 else {
2062 new = PyString_FromStringAndSize(str, len);
2063 if (new == NULL)
2064 return NULL;
2065 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002066 }
2067 else {
2068 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002069 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002070 }
2071 return new;
2072}
2073
2074
2075static char startswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002076"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002077\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002078Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002079optional start, test S beginning at that position. With optional end, stop\n\
2080comparing S at that position.";
2081
2082static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002083string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002084{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002085 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002087 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002088 int plen;
2089 int start = 0;
2090 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002091 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002092
Guido van Rossumc6821402000-05-08 14:08:05 +00002093 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2094 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002095 return NULL;
2096 if (PyString_Check(subobj)) {
2097 prefix = PyString_AS_STRING(subobj);
2098 plen = PyString_GET_SIZE(subobj);
2099 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002100#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002101 else if (PyUnicode_Check(subobj)) {
2102 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002103 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002104 subobj, start, end, -1);
2105 if (rc == -1)
2106 return NULL;
2107 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002108 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002109 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002110#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002112 return NULL;
2113
2114 /* adopt Java semantics for index out of range. it is legal for
2115 * offset to be == plen, but this only returns true if prefix is
2116 * the empty string.
2117 */
2118 if (start < 0 || start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002119 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002120
2121 if (!memcmp(str+start, prefix, plen)) {
2122 /* did the match end after the specified end? */
2123 if (end < 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002124 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002125 else if (end - start < plen)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002126 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002127 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002128 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002129 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002130 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002131}
2132
2133
2134static char endswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002135"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002137Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002138optional start, test S beginning at that position. With optional end, stop\n\
2139comparing S at that position.";
2140
2141static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002142string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002144 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002146 const char* suffix;
2147 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002148 int start = 0;
2149 int end = -1;
2150 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002151 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152
Guido van Rossumc6821402000-05-08 14:08:05 +00002153 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2154 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002155 return NULL;
2156 if (PyString_Check(subobj)) {
2157 suffix = PyString_AS_STRING(subobj);
2158 slen = PyString_GET_SIZE(subobj);
2159 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002160#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002161 else if (PyUnicode_Check(subobj)) {
2162 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002163 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002164 subobj, start, end, +1);
2165 if (rc == -1)
2166 return NULL;
2167 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002168 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002169 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002170#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002172 return NULL;
2173
Guido van Rossum4c08d552000-03-10 22:55:18 +00002174 if (start < 0 || start > len || slen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002175 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002176
2177 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002178 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002179
Guido van Rossum4c08d552000-03-10 22:55:18 +00002180 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002181 return PyBool_FromLong(1);
2182 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002183}
2184
2185
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002186static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002187"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002188\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002189Encodes S using the codec registered for encoding. encoding defaults\n\
2190to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002191handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2192a ValueError. Other possible values are 'ignore' and 'replace'.";
2193
2194static PyObject *
2195string_encode(PyStringObject *self, PyObject *args)
2196{
2197 char *encoding = NULL;
2198 char *errors = NULL;
2199 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2200 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002201 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2202}
2203
2204
2205static char decode__doc__[] =
2206"S.decode([encoding[,errors]]) -> object\n\
2207\n\
2208Decodes S using the codec registered for encoding. encoding defaults\n\
2209to the default encoding. errors may be given to set a different error\n\
2210handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2211a ValueError. Other possible values are 'ignore' and 'replace'.";
2212
2213static PyObject *
2214string_decode(PyStringObject *self, PyObject *args)
2215{
2216 char *encoding = NULL;
2217 char *errors = NULL;
2218 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2219 return NULL;
2220 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002221}
2222
2223
Guido van Rossum4c08d552000-03-10 22:55:18 +00002224static char expandtabs__doc__[] =
2225"S.expandtabs([tabsize]) -> string\n\
2226\n\
2227Return a copy of S where all tab characters are expanded using spaces.\n\
2228If tabsize is not given, a tab size of 8 characters is assumed.";
2229
2230static PyObject*
2231string_expandtabs(PyStringObject *self, PyObject *args)
2232{
2233 const char *e, *p;
2234 char *q;
2235 int i, j;
2236 PyObject *u;
2237 int tabsize = 8;
2238
2239 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2240 return NULL;
2241
Thomas Wouters7e474022000-07-16 12:04:32 +00002242 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002243 i = j = 0;
2244 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2245 for (p = PyString_AS_STRING(self); p < e; p++)
2246 if (*p == '\t') {
2247 if (tabsize > 0)
2248 j += tabsize - (j % tabsize);
2249 }
2250 else {
2251 j++;
2252 if (*p == '\n' || *p == '\r') {
2253 i += j;
2254 j = 0;
2255 }
2256 }
2257
2258 /* Second pass: create output string and fill it */
2259 u = PyString_FromStringAndSize(NULL, i + j);
2260 if (!u)
2261 return NULL;
2262
2263 j = 0;
2264 q = PyString_AS_STRING(u);
2265
2266 for (p = PyString_AS_STRING(self); p < e; p++)
2267 if (*p == '\t') {
2268 if (tabsize > 0) {
2269 i = tabsize - (j % tabsize);
2270 j += i;
2271 while (i--)
2272 *q++ = ' ';
2273 }
2274 }
2275 else {
2276 j++;
2277 *q++ = *p;
2278 if (*p == '\n' || *p == '\r')
2279 j = 0;
2280 }
2281
2282 return u;
2283}
2284
Tim Peters8fa5dd02001-09-12 02:18:30 +00002285static PyObject *
2286pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287{
2288 PyObject *u;
2289
2290 if (left < 0)
2291 left = 0;
2292 if (right < 0)
2293 right = 0;
2294
Tim Peters8fa5dd02001-09-12 02:18:30 +00002295 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296 Py_INCREF(self);
2297 return (PyObject *)self;
2298 }
2299
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002300 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301 left + PyString_GET_SIZE(self) + right);
2302 if (u) {
2303 if (left)
2304 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002305 memcpy(PyString_AS_STRING(u) + left,
2306 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 PyString_GET_SIZE(self));
2308 if (right)
2309 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2310 fill, right);
2311 }
2312
2313 return u;
2314}
2315
2316static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002317"S.ljust(width) -> string\n"
2318"\n"
2319"Return S left justified in a string of length width. Padding is\n"
2320"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002321
2322static PyObject *
2323string_ljust(PyStringObject *self, PyObject *args)
2324{
2325 int width;
2326 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2327 return NULL;
2328
Tim Peters8fa5dd02001-09-12 02:18:30 +00002329 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002330 Py_INCREF(self);
2331 return (PyObject*) self;
2332 }
2333
2334 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2335}
2336
2337
2338static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002339"S.rjust(width) -> string\n"
2340"\n"
2341"Return S right justified in a string of length width. Padding is\n"
2342"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002343
2344static PyObject *
2345string_rjust(PyStringObject *self, PyObject *args)
2346{
2347 int width;
2348 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2349 return NULL;
2350
Tim Peters8fa5dd02001-09-12 02:18:30 +00002351 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002352 Py_INCREF(self);
2353 return (PyObject*) self;
2354 }
2355
2356 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2357}
2358
2359
2360static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002361"S.center(width) -> string\n"
2362"\n"
2363"Return S centered in a string of length width. Padding is done\n"
2364"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365
2366static PyObject *
2367string_center(PyStringObject *self, PyObject *args)
2368{
2369 int marg, left;
2370 int width;
2371
2372 if (!PyArg_ParseTuple(args, "i:center", &width))
2373 return NULL;
2374
Tim Peters8fa5dd02001-09-12 02:18:30 +00002375 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002376 Py_INCREF(self);
2377 return (PyObject*) self;
2378 }
2379
2380 marg = width - PyString_GET_SIZE(self);
2381 left = marg / 2 + (marg & width & 1);
2382
2383 return pad(self, left, marg - left, ' ');
2384}
2385
Walter Dörwald068325e2002-04-15 13:36:47 +00002386static char zfill__doc__[] =
2387"S.zfill(width) -> string\n"
2388"\n"
2389"Pad a numeric string S with zeros on the left, to fill a field\n"
2390"of the specified width. The string S is never truncated.";
2391
2392static PyObject *
2393string_zfill(PyStringObject *self, PyObject *args)
2394{
2395 int fill;
2396 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002397 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002398
2399 int width;
2400 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2401 return NULL;
2402
2403 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002404 if (PyString_CheckExact(self)) {
2405 Py_INCREF(self);
2406 return (PyObject*) self;
2407 }
2408 else
2409 return PyString_FromStringAndSize(
2410 PyString_AS_STRING(self),
2411 PyString_GET_SIZE(self)
2412 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002413 }
2414
2415 fill = width - PyString_GET_SIZE(self);
2416
2417 s = pad(self, fill, 0, '0');
2418
2419 if (s == NULL)
2420 return NULL;
2421
2422 p = PyString_AS_STRING(s);
2423 if (p[fill] == '+' || p[fill] == '-') {
2424 /* move sign to beginning of string */
2425 p[0] = p[fill];
2426 p[fill] = '0';
2427 }
2428
2429 return (PyObject*) s;
2430}
2431
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432static char isspace__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002433"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002434"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002435"Return True if there are only whitespace characters in S,\n"
2436"False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002437
2438static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002439string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002440{
Fred Drakeba096332000-07-09 07:04:36 +00002441 register const unsigned char *p
2442 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002443 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444
Guido van Rossum4c08d552000-03-10 22:55:18 +00002445 /* Shortcut for single character strings */
2446 if (PyString_GET_SIZE(self) == 1 &&
2447 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002448 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002450 /* Special case for empty strings */
2451 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002452 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002453
Guido van Rossum4c08d552000-03-10 22:55:18 +00002454 e = p + PyString_GET_SIZE(self);
2455 for (; p < e; p++) {
2456 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002457 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002458 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002459 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002460}
2461
2462
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002463static char isalpha__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002464"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002465\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002466Return True if all characters in S are alphabetic\n\
2467and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002468
2469static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002470string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002471{
Fred Drakeba096332000-07-09 07:04:36 +00002472 register const unsigned char *p
2473 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002474 register const unsigned char *e;
2475
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002476 /* Shortcut for single character strings */
2477 if (PyString_GET_SIZE(self) == 1 &&
2478 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002479 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002480
2481 /* Special case for empty strings */
2482 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002483 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002484
2485 e = p + PyString_GET_SIZE(self);
2486 for (; p < e; p++) {
2487 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002488 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002489 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002490 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002491}
2492
2493
2494static char isalnum__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002495"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002496\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002497Return True if all characters in S are alphanumeric\n\
2498and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002499
2500static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002501string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002502{
Fred Drakeba096332000-07-09 07:04:36 +00002503 register const unsigned char *p
2504 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002505 register const unsigned char *e;
2506
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002507 /* Shortcut for single character strings */
2508 if (PyString_GET_SIZE(self) == 1 &&
2509 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002510 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002511
2512 /* Special case for empty strings */
2513 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002514 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002515
2516 e = p + PyString_GET_SIZE(self);
2517 for (; p < e; p++) {
2518 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002519 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002520 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002521 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002522}
2523
2524
Guido van Rossum4c08d552000-03-10 22:55:18 +00002525static char isdigit__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002526"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002527\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002528Return True if there are only digit characters in S,\n\
2529False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002530
2531static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002532string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002533{
Fred Drakeba096332000-07-09 07:04:36 +00002534 register const unsigned char *p
2535 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002536 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002537
Guido van Rossum4c08d552000-03-10 22:55:18 +00002538 /* Shortcut for single character strings */
2539 if (PyString_GET_SIZE(self) == 1 &&
2540 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002541 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002542
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002543 /* Special case for empty strings */
2544 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002545 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002546
Guido van Rossum4c08d552000-03-10 22:55:18 +00002547 e = p + PyString_GET_SIZE(self);
2548 for (; p < e; p++) {
2549 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002550 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002551 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002552 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002553}
2554
2555
2556static char islower__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002557"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002559Return True if all cased characters in S are lowercase and there is\n\
2560at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002561
2562static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002563string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002564{
Fred Drakeba096332000-07-09 07:04:36 +00002565 register const unsigned char *p
2566 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002567 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002568 int cased;
2569
Guido van Rossum4c08d552000-03-10 22:55:18 +00002570 /* Shortcut for single character strings */
2571 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002572 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002573
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002574 /* Special case for empty strings */
2575 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002576 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002577
Guido van Rossum4c08d552000-03-10 22:55:18 +00002578 e = p + PyString_GET_SIZE(self);
2579 cased = 0;
2580 for (; p < e; p++) {
2581 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002582 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002583 else if (!cased && islower(*p))
2584 cased = 1;
2585 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002586 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002587}
2588
2589
2590static char isupper__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002591"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002592\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002593Return True if all cased characters in S are uppercase and there is\n\
2594at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002595
2596static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002597string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002598{
Fred Drakeba096332000-07-09 07:04:36 +00002599 register const unsigned char *p
2600 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002601 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002602 int cased;
2603
Guido van Rossum4c08d552000-03-10 22:55:18 +00002604 /* Shortcut for single character strings */
2605 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002606 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002607
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002608 /* Special case for empty strings */
2609 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002610 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002611
Guido van Rossum4c08d552000-03-10 22:55:18 +00002612 e = p + PyString_GET_SIZE(self);
2613 cased = 0;
2614 for (; p < e; p++) {
2615 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002616 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002617 else if (!cased && isupper(*p))
2618 cased = 1;
2619 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002620 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002621}
2622
2623
2624static char istitle__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002625"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002626\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002627Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628may only follow uncased characters and lowercase characters only cased\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002629ones. Return False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002630
2631static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002632string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633{
Fred Drakeba096332000-07-09 07:04:36 +00002634 register const unsigned char *p
2635 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002636 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002637 int cased, previous_is_cased;
2638
Guido van Rossum4c08d552000-03-10 22:55:18 +00002639 /* Shortcut for single character strings */
2640 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002641 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002642
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002643 /* Special case for empty strings */
2644 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002645 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002646
Guido van Rossum4c08d552000-03-10 22:55:18 +00002647 e = p + PyString_GET_SIZE(self);
2648 cased = 0;
2649 previous_is_cased = 0;
2650 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002651 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002652
2653 if (isupper(ch)) {
2654 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002655 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002656 previous_is_cased = 1;
2657 cased = 1;
2658 }
2659 else if (islower(ch)) {
2660 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002661 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002662 previous_is_cased = 1;
2663 cased = 1;
2664 }
2665 else
2666 previous_is_cased = 0;
2667 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002668 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002669}
2670
2671
2672static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002673"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002674\n\
2675Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002676Line breaks are not included in the resulting list unless keepends\n\
2677is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002678
2679#define SPLIT_APPEND(data, left, right) \
2680 str = PyString_FromStringAndSize(data + left, right - left); \
2681 if (!str) \
2682 goto onError; \
2683 if (PyList_Append(list, str)) { \
2684 Py_DECREF(str); \
2685 goto onError; \
2686 } \
2687 else \
2688 Py_DECREF(str);
2689
2690static PyObject*
2691string_splitlines(PyStringObject *self, PyObject *args)
2692{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002693 register int i;
2694 register int j;
2695 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002696 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002697 PyObject *list;
2698 PyObject *str;
2699 char *data;
2700
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002701 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002702 return NULL;
2703
2704 data = PyString_AS_STRING(self);
2705 len = PyString_GET_SIZE(self);
2706
Guido van Rossum4c08d552000-03-10 22:55:18 +00002707 list = PyList_New(0);
2708 if (!list)
2709 goto onError;
2710
2711 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002712 int eol;
2713
Guido van Rossum4c08d552000-03-10 22:55:18 +00002714 /* Find a line and append it */
2715 while (i < len && data[i] != '\n' && data[i] != '\r')
2716 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002717
2718 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002719 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002720 if (i < len) {
2721 if (data[i] == '\r' && i + 1 < len &&
2722 data[i+1] == '\n')
2723 i += 2;
2724 else
2725 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002726 if (keepends)
2727 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002728 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002729 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002730 j = i;
2731 }
2732 if (j < len) {
2733 SPLIT_APPEND(data, j, len);
2734 }
2735
2736 return list;
2737
2738 onError:
2739 Py_DECREF(list);
2740 return NULL;
2741}
2742
2743#undef SPLIT_APPEND
2744
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002745
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002746static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002747string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002748 /* Counterparts of the obsolete stropmodule functions; except
2749 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002750 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2751 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2752 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2753 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002754 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2755 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2756 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2757 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2758 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2759 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2760 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002761 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
2762 capitalize__doc__},
2763 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2764 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2765 endswith__doc__},
2766 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2767 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2768 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2769 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2770 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2771 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2772 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2773 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2774 startswith__doc__},
2775 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2776 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
2777 swapcase__doc__},
2778 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2779 translate__doc__},
2780 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2781 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2782 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2783 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2784 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
2785 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2786 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2787 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
2788 expandtabs__doc__},
2789 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
2790 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002791 {NULL, NULL} /* sentinel */
2792};
2793
Guido van Rossumae960af2001-08-30 03:11:59 +00002794staticforward PyObject *
2795str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2796
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002797static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002798string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002799{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002800 PyObject *x = NULL;
2801 static char *kwlist[] = {"object", 0};
2802
Guido van Rossumae960af2001-08-30 03:11:59 +00002803 if (type != &PyString_Type)
2804 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002805 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2806 return NULL;
2807 if (x == NULL)
2808 return PyString_FromString("");
2809 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002810}
2811
Guido van Rossumae960af2001-08-30 03:11:59 +00002812static PyObject *
2813str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2814{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002815 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002816 int n;
2817
2818 assert(PyType_IsSubtype(type, &PyString_Type));
2819 tmp = string_new(&PyString_Type, args, kwds);
2820 if (tmp == NULL)
2821 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002822 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002823 n = PyString_GET_SIZE(tmp);
2824 pnew = type->tp_alloc(type, n);
2825 if (pnew != NULL) {
2826 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002827 ((PyStringObject *)pnew)->ob_shash =
2828 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002829 ((PyStringObject *)pnew)->ob_sinterned =
2830 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002831 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002832 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002833 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002834}
2835
Tim Peters6d6c1a32001-08-02 04:15:00 +00002836static char string_doc[] =
2837"str(object) -> string\n\
2838\n\
2839Return a nice string representation of the object.\n\
2840If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002841
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002842PyTypeObject PyString_Type = {
2843 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002844 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002845 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002846 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002847 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002848 (destructor)string_dealloc, /* tp_dealloc */
2849 (printfunc)string_print, /* tp_print */
2850 0, /* tp_getattr */
2851 0, /* tp_setattr */
2852 0, /* tp_compare */
2853 (reprfunc)string_repr, /* tp_repr */
2854 0, /* tp_as_number */
2855 &string_as_sequence, /* tp_as_sequence */
2856 0, /* tp_as_mapping */
2857 (hashfunc)string_hash, /* tp_hash */
2858 0, /* tp_call */
2859 (reprfunc)string_str, /* tp_str */
2860 PyObject_GenericGetAttr, /* tp_getattro */
2861 0, /* tp_setattro */
2862 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002863 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002864 string_doc, /* tp_doc */
2865 0, /* tp_traverse */
2866 0, /* tp_clear */
2867 (richcmpfunc)string_richcompare, /* tp_richcompare */
2868 0, /* tp_weaklistoffset */
2869 0, /* tp_iter */
2870 0, /* tp_iternext */
2871 string_methods, /* tp_methods */
2872 0, /* tp_members */
2873 0, /* tp_getset */
2874 0, /* tp_base */
2875 0, /* tp_dict */
2876 0, /* tp_descr_get */
2877 0, /* tp_descr_set */
2878 0, /* tp_dictoffset */
2879 0, /* tp_init */
2880 0, /* tp_alloc */
2881 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00002882 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002883};
2884
2885void
Fred Drakeba096332000-07-09 07:04:36 +00002886PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002887{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002888 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002889 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002890 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002891 if (w == NULL || !PyString_Check(*pv)) {
2892 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002893 *pv = NULL;
2894 return;
2895 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002896 v = string_concat((PyStringObject *) *pv, w);
2897 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002898 *pv = v;
2899}
2900
Guido van Rossum013142a1994-08-30 08:19:36 +00002901void
Fred Drakeba096332000-07-09 07:04:36 +00002902PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002903{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002904 PyString_Concat(pv, w);
2905 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002906}
2907
2908
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002909/* The following function breaks the notion that strings are immutable:
2910 it changes the size of a string. We get away with this only if there
2911 is only one module referencing the object. You can also think of it
2912 as creating a new string object and destroying the old one, only
2913 more efficiently. In any case, don't use this if the string may
2914 already be known to some other part of the code... */
2915
2916int
Fred Drakeba096332000-07-09 07:04:36 +00002917_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002918{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002919 register PyObject *v;
2920 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002921 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002922 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002923 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002924 Py_DECREF(v);
2925 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002926 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002927 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002928 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002929#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002930 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002931#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002932 _Py_ForgetReference(v);
2933 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00002934 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002935 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002936 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00002937 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002938 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002939 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002940 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002941 _Py_NewReference(*pv);
2942 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002943 sv->ob_size = newsize;
2944 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002945 return 0;
2946}
Guido van Rossume5372401993-03-16 12:15:04 +00002947
2948/* Helpers for formatstring */
2949
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002950static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002951getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002952{
2953 int argidx = *p_argidx;
2954 if (argidx < arglen) {
2955 (*p_argidx)++;
2956 if (arglen < 0)
2957 return args;
2958 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002959 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002960 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002961 PyErr_SetString(PyExc_TypeError,
2962 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002963 return NULL;
2964}
2965
Tim Peters38fd5b62000-09-21 05:43:11 +00002966/* Format codes
2967 * F_LJUST '-'
2968 * F_SIGN '+'
2969 * F_BLANK ' '
2970 * F_ALT '#'
2971 * F_ZERO '0'
2972 */
Guido van Rossume5372401993-03-16 12:15:04 +00002973#define F_LJUST (1<<0)
2974#define F_SIGN (1<<1)
2975#define F_BLANK (1<<2)
2976#define F_ALT (1<<3)
2977#define F_ZERO (1<<4)
2978
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002979static int
Fred Drakeba096332000-07-09 07:04:36 +00002980formatfloat(char *buf, size_t buflen, int flags,
2981 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002982{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002983 /* fmt = '%#.' + `prec` + `type`
2984 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002985 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002986 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002987 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002988 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002989 if (prec < 0)
2990 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002991 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2992 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00002993 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
2994 (flags&F_ALT) ? "#" : "",
2995 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002996 /* worst case length calc to ensure no buffer overrun:
2997 fmt = %#.<prec>g
2998 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002999 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003000 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3001 If prec=0 the effective precision is 1 (the leading digit is
3002 always given), therefore increase by one to 10+prec. */
3003 if (buflen <= (size_t)10 + (size_t)prec) {
3004 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003005 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003006 return -1;
3007 }
Tim Peters885d4572001-11-28 20:27:42 +00003008 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003009 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003010}
3011
Tim Peters38fd5b62000-09-21 05:43:11 +00003012/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3013 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3014 * Python's regular ints.
3015 * Return value: a new PyString*, or NULL if error.
3016 * . *pbuf is set to point into it,
3017 * *plen set to the # of chars following that.
3018 * Caller must decref it when done using pbuf.
3019 * The string starting at *pbuf is of the form
3020 * "-"? ("0x" | "0X")? digit+
3021 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003022 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003023 * There will be at least prec digits, zero-filled on the left if
3024 * necessary to get that many.
3025 * val object to be converted
3026 * flags bitmask of format flags; only F_ALT is looked at
3027 * prec minimum number of digits; 0-fill on left if needed
3028 * type a character in [duoxX]; u acts the same as d
3029 *
3030 * CAUTION: o, x and X conversions on regular ints can never
3031 * produce a '-' sign, but can for Python's unbounded ints.
3032 */
3033PyObject*
3034_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3035 char **pbuf, int *plen)
3036{
3037 PyObject *result = NULL;
3038 char *buf;
3039 int i;
3040 int sign; /* 1 if '-', else 0 */
3041 int len; /* number of characters */
3042 int numdigits; /* len == numnondigits + numdigits */
3043 int numnondigits = 0;
3044
3045 switch (type) {
3046 case 'd':
3047 case 'u':
3048 result = val->ob_type->tp_str(val);
3049 break;
3050 case 'o':
3051 result = val->ob_type->tp_as_number->nb_oct(val);
3052 break;
3053 case 'x':
3054 case 'X':
3055 numnondigits = 2;
3056 result = val->ob_type->tp_as_number->nb_hex(val);
3057 break;
3058 default:
3059 assert(!"'type' not in [duoxX]");
3060 }
3061 if (!result)
3062 return NULL;
3063
3064 /* To modify the string in-place, there can only be one reference. */
3065 if (result->ob_refcnt != 1) {
3066 PyErr_BadInternalCall();
3067 return NULL;
3068 }
3069 buf = PyString_AsString(result);
3070 len = PyString_Size(result);
3071 if (buf[len-1] == 'L') {
3072 --len;
3073 buf[len] = '\0';
3074 }
3075 sign = buf[0] == '-';
3076 numnondigits += sign;
3077 numdigits = len - numnondigits;
3078 assert(numdigits > 0);
3079
Tim Petersfff53252001-04-12 18:38:48 +00003080 /* Get rid of base marker unless F_ALT */
3081 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003082 /* Need to skip 0x, 0X or 0. */
3083 int skipped = 0;
3084 switch (type) {
3085 case 'o':
3086 assert(buf[sign] == '0');
3087 /* If 0 is only digit, leave it alone. */
3088 if (numdigits > 1) {
3089 skipped = 1;
3090 --numdigits;
3091 }
3092 break;
3093 case 'x':
3094 case 'X':
3095 assert(buf[sign] == '0');
3096 assert(buf[sign + 1] == 'x');
3097 skipped = 2;
3098 numnondigits -= 2;
3099 break;
3100 }
3101 if (skipped) {
3102 buf += skipped;
3103 len -= skipped;
3104 if (sign)
3105 buf[0] = '-';
3106 }
3107 assert(len == numnondigits + numdigits);
3108 assert(numdigits > 0);
3109 }
3110
3111 /* Fill with leading zeroes to meet minimum width. */
3112 if (prec > numdigits) {
3113 PyObject *r1 = PyString_FromStringAndSize(NULL,
3114 numnondigits + prec);
3115 char *b1;
3116 if (!r1) {
3117 Py_DECREF(result);
3118 return NULL;
3119 }
3120 b1 = PyString_AS_STRING(r1);
3121 for (i = 0; i < numnondigits; ++i)
3122 *b1++ = *buf++;
3123 for (i = 0; i < prec - numdigits; i++)
3124 *b1++ = '0';
3125 for (i = 0; i < numdigits; i++)
3126 *b1++ = *buf++;
3127 *b1 = '\0';
3128 Py_DECREF(result);
3129 result = r1;
3130 buf = PyString_AS_STRING(result);
3131 len = numnondigits + prec;
3132 }
3133
3134 /* Fix up case for hex conversions. */
3135 switch (type) {
3136 case 'x':
3137 /* Need to convert all upper case letters to lower case. */
3138 for (i = 0; i < len; i++)
3139 if (buf[i] >= 'A' && buf[i] <= 'F')
3140 buf[i] += 'a'-'A';
3141 break;
3142 case 'X':
3143 /* Need to convert 0x to 0X (and -0x to -0X). */
3144 if (buf[sign + 1] == 'x')
3145 buf[sign + 1] = 'X';
3146 break;
3147 }
3148 *pbuf = buf;
3149 *plen = len;
3150 return result;
3151}
3152
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003153static int
Fred Drakeba096332000-07-09 07:04:36 +00003154formatint(char *buf, size_t buflen, int flags,
3155 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003156{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003157 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003158 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3159 + 1 + 1 = 24 */
3160 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003161 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003162
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003163 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003164 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003165 if (prec < 0)
3166 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003167
3168 if ((flags & F_ALT) &&
3169 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003170 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003171 * of issues that cause pain:
3172 * - when 0 is being converted, the C standard leaves off
3173 * the '0x' or '0X', which is inconsistent with other
3174 * %#x/%#X conversions and inconsistent with Python's
3175 * hex() function
3176 * - there are platforms that violate the standard and
3177 * convert 0 with the '0x' or '0X'
3178 * (Metrowerks, Compaq Tru64)
3179 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003180 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003181 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003182 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003183 * We can achieve the desired consistency by inserting our
3184 * own '0x' or '0X' prefix, and substituting %x/%X in place
3185 * of %#x/%#X.
3186 *
3187 * Note that this is the same approach as used in
3188 * formatint() in unicodeobject.c
3189 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003190 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003191 type, prec, type);
3192 }
3193 else {
3194 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003195 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003196 prec, type);
3197 }
3198
Tim Peters38fd5b62000-09-21 05:43:11 +00003199 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003200 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3201 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003202 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003203 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003204 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003205 return -1;
3206 }
Tim Peters885d4572001-11-28 20:27:42 +00003207 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003208 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003209}
3210
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003211static int
Fred Drakeba096332000-07-09 07:04:36 +00003212formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003213{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003214 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003215 if (PyString_Check(v)) {
3216 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003217 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003218 }
3219 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003220 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003221 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003222 }
3223 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003224 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003225}
3226
Guido van Rossum013142a1994-08-30 08:19:36 +00003227
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003228/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3229
3230 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3231 chars are formatted. XXX This is a magic number. Each formatting
3232 routine does bounds checking to ensure no overflow, but a better
3233 solution may be to malloc a buffer of appropriate size for each
3234 format. For now, the current solution is sufficient.
3235*/
3236#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003237
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003238PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003239PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003240{
3241 char *fmt, *res;
3242 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003243 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003244 PyObject *result, *orig_args;
3245#ifdef Py_USING_UNICODE
3246 PyObject *v, *w;
3247#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003248 PyObject *dict = NULL;
3249 if (format == NULL || !PyString_Check(format) || args == NULL) {
3250 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003251 return NULL;
3252 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003253 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003254 fmt = PyString_AS_STRING(format);
3255 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003256 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003257 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003258 if (result == NULL)
3259 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003260 res = PyString_AsString(result);
3261 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003262 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003263 argidx = 0;
3264 }
3265 else {
3266 arglen = -1;
3267 argidx = -2;
3268 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003269 if (args->ob_type->tp_as_mapping)
3270 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003271 while (--fmtcnt >= 0) {
3272 if (*fmt != '%') {
3273 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003274 rescnt = fmtcnt + 100;
3275 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003276 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003277 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003278 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003279 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003280 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003281 }
3282 *res++ = *fmt++;
3283 }
3284 else {
3285 /* Got a format specifier */
3286 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003287 int width = -1;
3288 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003289 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003290 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003291 PyObject *v = NULL;
3292 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003293 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003294 int sign;
3295 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003296 char formatbuf[FORMATBUFLEN];
3297 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003298#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003299 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003300 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003301#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003302
Guido van Rossumda9c2711996-12-05 21:58:58 +00003303 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003304 if (*fmt == '(') {
3305 char *keystart;
3306 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003307 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003308 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003309
3310 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003311 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003312 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003313 goto error;
3314 }
3315 ++fmt;
3316 --fmtcnt;
3317 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003318 /* Skip over balanced parentheses */
3319 while (pcount > 0 && --fmtcnt >= 0) {
3320 if (*fmt == ')')
3321 --pcount;
3322 else if (*fmt == '(')
3323 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003324 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003325 }
3326 keylen = fmt - keystart - 1;
3327 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003328 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003329 "incomplete format key");
3330 goto error;
3331 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003332 key = PyString_FromStringAndSize(keystart,
3333 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003334 if (key == NULL)
3335 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003336 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003337 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003338 args_owned = 0;
3339 }
3340 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003341 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003342 if (args == NULL) {
3343 goto error;
3344 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003345 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003346 arglen = -1;
3347 argidx = -2;
3348 }
Guido van Rossume5372401993-03-16 12:15:04 +00003349 while (--fmtcnt >= 0) {
3350 switch (c = *fmt++) {
3351 case '-': flags |= F_LJUST; continue;
3352 case '+': flags |= F_SIGN; continue;
3353 case ' ': flags |= F_BLANK; continue;
3354 case '#': flags |= F_ALT; continue;
3355 case '0': flags |= F_ZERO; continue;
3356 }
3357 break;
3358 }
3359 if (c == '*') {
3360 v = getnextarg(args, arglen, &argidx);
3361 if (v == NULL)
3362 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003363 if (!PyInt_Check(v)) {
3364 PyErr_SetString(PyExc_TypeError,
3365 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003366 goto error;
3367 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003368 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003369 if (width < 0) {
3370 flags |= F_LJUST;
3371 width = -width;
3372 }
Guido van Rossume5372401993-03-16 12:15:04 +00003373 if (--fmtcnt >= 0)
3374 c = *fmt++;
3375 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003376 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003377 width = c - '0';
3378 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003379 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003380 if (!isdigit(c))
3381 break;
3382 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003383 PyErr_SetString(
3384 PyExc_ValueError,
3385 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003386 goto error;
3387 }
3388 width = width*10 + (c - '0');
3389 }
3390 }
3391 if (c == '.') {
3392 prec = 0;
3393 if (--fmtcnt >= 0)
3394 c = *fmt++;
3395 if (c == '*') {
3396 v = getnextarg(args, arglen, &argidx);
3397 if (v == NULL)
3398 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003399 if (!PyInt_Check(v)) {
3400 PyErr_SetString(
3401 PyExc_TypeError,
3402 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003403 goto error;
3404 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003405 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003406 if (prec < 0)
3407 prec = 0;
3408 if (--fmtcnt >= 0)
3409 c = *fmt++;
3410 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003411 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003412 prec = c - '0';
3413 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003414 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003415 if (!isdigit(c))
3416 break;
3417 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003418 PyErr_SetString(
3419 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003420 "prec too big");
3421 goto error;
3422 }
3423 prec = prec*10 + (c - '0');
3424 }
3425 }
3426 } /* prec */
3427 if (fmtcnt >= 0) {
3428 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003429 if (--fmtcnt >= 0)
3430 c = *fmt++;
3431 }
3432 }
3433 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003434 PyErr_SetString(PyExc_ValueError,
3435 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003436 goto error;
3437 }
3438 if (c != '%') {
3439 v = getnextarg(args, arglen, &argidx);
3440 if (v == NULL)
3441 goto error;
3442 }
3443 sign = 0;
3444 fill = ' ';
3445 switch (c) {
3446 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003447 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003448 len = 1;
3449 break;
3450 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003451 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003452#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003453 if (PyUnicode_Check(v)) {
3454 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003455 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003456 goto unicode;
3457 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003458#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003459 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003460 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003461 else
3462 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003463 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003464 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003465 if (!PyString_Check(temp)) {
3466 PyErr_SetString(PyExc_TypeError,
3467 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003468 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003469 goto error;
3470 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003471 pbuf = PyString_AS_STRING(temp);
3472 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003473 if (prec >= 0 && len > prec)
3474 len = prec;
3475 break;
3476 case 'i':
3477 case 'd':
3478 case 'u':
3479 case 'o':
3480 case 'x':
3481 case 'X':
3482 if (c == 'i')
3483 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003484 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003485 temp = _PyString_FormatLong(v, flags,
3486 prec, c, &pbuf, &len);
3487 if (!temp)
3488 goto error;
3489 /* unbounded ints can always produce
3490 a sign character! */
3491 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003492 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003493 else {
3494 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003495 len = formatint(pbuf,
3496 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003497 flags, prec, c, v);
3498 if (len < 0)
3499 goto error;
3500 /* only d conversion is signed */
3501 sign = c == 'd';
3502 }
3503 if (flags & F_ZERO)
3504 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003505 break;
3506 case 'e':
3507 case 'E':
3508 case 'f':
3509 case 'g':
3510 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003511 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003512 len = formatfloat(pbuf, sizeof(formatbuf),
3513 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003514 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003515 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003516 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003517 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003518 fill = '0';
3519 break;
3520 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003521 pbuf = formatbuf;
3522 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003523 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003524 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003525 break;
3526 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003527 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003528 "unsupported format character '%c' (0x%x) "
3529 "at index %i",
3530 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003531 goto error;
3532 }
3533 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003534 if (*pbuf == '-' || *pbuf == '+') {
3535 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003536 len--;
3537 }
3538 else if (flags & F_SIGN)
3539 sign = '+';
3540 else if (flags & F_BLANK)
3541 sign = ' ';
3542 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003543 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003544 }
3545 if (width < len)
3546 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003547 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003548 reslen -= rescnt;
3549 rescnt = width + fmtcnt + 100;
3550 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003551 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003552 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003553 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003554 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003555 }
3556 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003557 if (fill != ' ')
3558 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003559 rescnt--;
3560 if (width > len)
3561 width--;
3562 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003563 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3564 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003565 assert(pbuf[1] == c);
3566 if (fill != ' ') {
3567 *res++ = *pbuf++;
3568 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003569 }
Tim Petersfff53252001-04-12 18:38:48 +00003570 rescnt -= 2;
3571 width -= 2;
3572 if (width < 0)
3573 width = 0;
3574 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003575 }
3576 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003577 do {
3578 --rescnt;
3579 *res++ = fill;
3580 } while (--width > len);
3581 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003582 if (fill == ' ') {
3583 if (sign)
3584 *res++ = sign;
3585 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003586 (c == 'x' || c == 'X')) {
3587 assert(pbuf[0] == '0');
3588 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003589 *res++ = *pbuf++;
3590 *res++ = *pbuf++;
3591 }
3592 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003593 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003594 res += len;
3595 rescnt -= len;
3596 while (--width >= len) {
3597 --rescnt;
3598 *res++ = ' ';
3599 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003600 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003601 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003602 "not all arguments converted");
3603 goto error;
3604 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003605 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003606 } /* '%' */
3607 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003608 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003609 PyErr_SetString(PyExc_TypeError,
3610 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003611 goto error;
3612 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003613 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003614 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003615 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003616 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003617 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003618
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003619#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003620 unicode:
3621 if (args_owned) {
3622 Py_DECREF(args);
3623 args_owned = 0;
3624 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003625 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003626 if (PyTuple_Check(orig_args) && argidx > 0) {
3627 PyObject *v;
3628 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3629 v = PyTuple_New(n);
3630 if (v == NULL)
3631 goto error;
3632 while (--n >= 0) {
3633 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3634 Py_INCREF(w);
3635 PyTuple_SET_ITEM(v, n, w);
3636 }
3637 args = v;
3638 } else {
3639 Py_INCREF(orig_args);
3640 args = orig_args;
3641 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003642 args_owned = 1;
3643 /* Take what we have of the result and let the Unicode formatting
3644 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003645 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003646 if (_PyString_Resize(&result, rescnt))
3647 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003648 fmtcnt = PyString_GET_SIZE(format) - \
3649 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003650 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3651 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003652 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003653 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003654 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003655 if (v == NULL)
3656 goto error;
3657 /* Paste what we have (result) to what the Unicode formatting
3658 function returned (v) and return the result (or error) */
3659 w = PyUnicode_Concat(result, v);
3660 Py_DECREF(result);
3661 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003662 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003663 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003664#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003665
Guido van Rossume5372401993-03-16 12:15:04 +00003666 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003667 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003668 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003669 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003670 }
Guido van Rossume5372401993-03-16 12:15:04 +00003671 return NULL;
3672}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003673
3674
Guido van Rossum2a61e741997-01-18 07:55:05 +00003675
Barry Warsaw4df762f2000-08-16 23:41:01 +00003676/* This dictionary will leak at PyString_Fini() time. That's acceptable
3677 * because PyString_Fini() specifically frees interned strings that are
3678 * only referenced by this dictionary. The CVS log entry for revision 2.45
3679 * says:
3680 *
3681 * Change the Fini function to only remove otherwise unreferenced
3682 * strings from the interned table. There are references in
3683 * hard-to-find static variables all over the interpreter, and it's not
3684 * worth trying to get rid of all those; but "uninterning" isn't fair
3685 * either and may cause subtle failures later -- so we have to keep them
3686 * in the interned table.
3687 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003688static PyObject *interned;
3689
3690void
Fred Drakeba096332000-07-09 07:04:36 +00003691PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003692{
3693 register PyStringObject *s = (PyStringObject *)(*p);
3694 PyObject *t;
3695 if (s == NULL || !PyString_Check(s))
3696 Py_FatalError("PyString_InternInPlace: strings only please!");
3697 if ((t = s->ob_sinterned) != NULL) {
3698 if (t == (PyObject *)s)
3699 return;
3700 Py_INCREF(t);
3701 *p = t;
3702 Py_DECREF(s);
3703 return;
3704 }
3705 if (interned == NULL) {
3706 interned = PyDict_New();
3707 if (interned == NULL)
3708 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003709 }
3710 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3711 Py_INCREF(t);
3712 *p = s->ob_sinterned = t;
3713 Py_DECREF(s);
3714 return;
3715 }
Tim Peters111f6092001-09-12 07:54:51 +00003716 /* Ensure that only true string objects appear in the intern dict,
3717 and as the value of ob_sinterned. */
3718 if (PyString_CheckExact(s)) {
3719 t = (PyObject *)s;
3720 if (PyDict_SetItem(interned, t, t) == 0) {
3721 s->ob_sinterned = t;
3722 return;
3723 }
3724 }
3725 else {
3726 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3727 PyString_GET_SIZE(s));
3728 if (t != NULL) {
3729 if (PyDict_SetItem(interned, t, t) == 0) {
3730 *p = s->ob_sinterned = t;
3731 Py_DECREF(s);
3732 return;
3733 }
3734 Py_DECREF(t);
3735 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003736 }
3737 PyErr_Clear();
3738}
3739
3740
3741PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003742PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003743{
3744 PyObject *s = PyString_FromString(cp);
3745 if (s == NULL)
3746 return NULL;
3747 PyString_InternInPlace(&s);
3748 return s;
3749}
3750
Guido van Rossum8cf04761997-08-02 02:57:45 +00003751void
Fred Drakeba096332000-07-09 07:04:36 +00003752PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003753{
3754 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003755 for (i = 0; i < UCHAR_MAX + 1; i++) {
3756 Py_XDECREF(characters[i]);
3757 characters[i] = NULL;
3758 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003759 Py_XDECREF(nullstring);
3760 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003761 if (interned) {
3762 int pos, changed;
3763 PyObject *key, *value;
3764 do {
3765 changed = 0;
3766 pos = 0;
3767 while (PyDict_Next(interned, &pos, &key, &value)) {
3768 if (key->ob_refcnt == 2 && key == value) {
3769 PyDict_DelItem(interned, key);
3770 changed = 1;
3771 }
3772 }
3773 } while (changed);
3774 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003775}
Barry Warsawa903ad982001-02-23 16:40:48 +00003776
Barry Warsawa903ad982001-02-23 16:40:48 +00003777void _Py_ReleaseInternedStrings(void)
3778{
3779 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003780 fprintf(stderr, "releasing interned strings\n");
3781 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003782 Py_DECREF(interned);
3783 interned = NULL;
3784 }
3785}