blob: d3c9e4bdf75dcb049b840ea840ce8f4f1f061d9b [file] [log] [blame]
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00001/* String object implementation */
2
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00004
Guido van Rossum013142a1994-08-30 08:19:36 +00005#include <ctype.h>
6
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00007#ifdef COUNT_ALLOCS
8int null_strings, one_strings;
9#endif
10
Fred Draked5fadf72000-09-26 05:46:01 +000011#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000012#define UCHAR_MAX 255
13#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000014
Guido van Rossumc0b618a1997-05-02 03:12:38 +000015static PyStringObject *characters[UCHAR_MAX + 1];
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *nullstring;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000017
18/*
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000019 For both PyString_FromString() and PyString_FromStringAndSize(), the
20 parameter `size' denotes number of characters to allocate, not counting any
Martin v. Löwis1f803f72002-01-16 10:53:24 +000021 null terminating character.
Martin v. Löwisd1327502001-12-02 18:09:41 +000022
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000023 For PyString_FromString(), the parameter `str' points to a null-terminated
Martin v. Löwis1f803f72002-01-16 10:53:24 +000024 string containing exactly `size' bytes.
Martin v. Löwisd1327502001-12-02 18:09:41 +000025
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000026 For PyString_FromStringAndSize(), the parameter the parameter `str' is
27 either NULL or else points to a string containing at least `size' bytes.
28 For PyString_FromStringAndSize(), the string in the `str' parameter does
29 not have to be null-terminated. (Therefore it is safe to construct a
30 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
31 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
32 bytes (setting the last byte to the null terminating character) and you can
33 fill in the data yourself. If `str' is non-NULL then the resulting
34 PyString object must be treated as immutable and you must not fill in nor
35 alter the data yourself, since the strings may be shared.
Martin v. Löwis8f1ea712001-12-03 08:24:52 +000036
Guido van Rossum3aa3fc42002-04-15 13:48:52 +000037 The PyObject member `op->ob_size', which denotes the number of "extra
38 items" in a variable-size object, will contain the number of bytes
39 allocated for string data, not counting the null terminating character. It
40 is therefore equal to the equal to the `size' parameter (for
41 PyString_FromStringAndSize()) or the length of the string in the `str'
42 parameter (for PyString_FromString()).
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000043*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000044PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000045PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000046{
Tim Peters9e897f42001-05-09 07:37:07 +000047 register PyStringObject *op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000048 if (size == 0 && (op = nullstring) != NULL) {
49#ifdef COUNT_ALLOCS
50 null_strings++;
51#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000052 Py_INCREF(op);
53 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000054 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000055 if (size == 1 && str != NULL &&
56 (op = characters[*str & UCHAR_MAX]) != NULL)
57 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000058#ifdef COUNT_ALLOCS
59 one_strings++;
60#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000061 Py_INCREF(op);
62 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000063 }
Guido van Rossumb18618d2000-05-03 23:44:39 +000064
65 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000066 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +000067 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000068 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000069 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000070 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000071 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +000072 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +000073 if (str != NULL)
74 memcpy(op->ob_sval, str, size);
75 op->ob_sval[size] = '\0';
Tim Peters8deda702002-03-30 10:06:07 +000076 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000077 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000078 PyObject *t = (PyObject *)op;
79 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000080 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000082 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000083 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000084 PyObject *t = (PyObject *)op;
85 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000086 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000088 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000089 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000091}
92
Guido van Rossumc0b618a1997-05-02 03:12:38 +000093PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000094PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000095{
Tim Peters62de65b2001-12-06 20:29:32 +000096 register size_t size;
Tim Peters9e897f42001-05-09 07:37:07 +000097 register PyStringObject *op;
Tim Peters62de65b2001-12-06 20:29:32 +000098
99 assert(str != NULL);
100 size = strlen(str);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000101 if (size > INT_MAX) {
102 PyErr_SetString(PyExc_OverflowError,
103 "string is too long for a Python string");
104 return NULL;
105 }
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000106 if (size == 0 && (op = nullstring) != NULL) {
107#ifdef COUNT_ALLOCS
108 null_strings++;
109#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000110 Py_INCREF(op);
111 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000112 }
113 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
114#ifdef COUNT_ALLOCS
115 one_strings++;
116#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 Py_INCREF(op);
118 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119 }
Guido van Rossumb18618d2000-05-03 23:44:39 +0000120
121 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000122 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000123 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000124 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000125 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000126 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128 op->ob_sinterned = NULL;
Guido van Rossum169192e2001-12-10 15:45:54 +0000129 memcpy(op->ob_sval, str, size+1);
Tim Peters8deda702002-03-30 10:06:07 +0000130 /* share short strings */
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000131 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000132 PyObject *t = (PyObject *)op;
133 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000134 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000135 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000136 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000138 PyObject *t = (PyObject *)op;
139 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000140 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000141 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000142 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000145}
146
Barry Warsawdadace02001-08-24 18:32:06 +0000147PyObject *
148PyString_FromFormatV(const char *format, va_list vargs)
149{
Tim Petersc15c4f12001-10-02 21:32:07 +0000150 va_list count;
Barry Warsawdadace02001-08-24 18:32:06 +0000151 int n = 0;
152 const char* f;
153 char *s;
154 PyObject* string;
155
Tim Petersc15c4f12001-10-02 21:32:07 +0000156#ifdef VA_LIST_IS_ARRAY
157 memcpy(count, vargs, sizeof(va_list));
158#else
159 count = vargs;
160#endif
Barry Warsawdadace02001-08-24 18:32:06 +0000161 /* step 1: figure out how large a buffer we need */
162 for (f = format; *f; f++) {
163 if (*f == '%') {
164 const char* p = f;
165 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
166 ;
167
168 /* skip the 'l' in %ld, since it doesn't change the
169 width. although only %d is supported (see
170 "expand" section below), others can be easily
Tim Peters9161c8b2001-12-03 01:55:38 +0000171 added */
Barry Warsawdadace02001-08-24 18:32:06 +0000172 if (*f == 'l' && *(f+1) == 'd')
173 ++f;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000174
Barry Warsawdadace02001-08-24 18:32:06 +0000175 switch (*f) {
176 case 'c':
177 (void)va_arg(count, int);
178 /* fall through... */
179 case '%':
180 n++;
181 break;
182 case 'd': case 'i': case 'x':
183 (void) va_arg(count, int);
Tim Peters9161c8b2001-12-03 01:55:38 +0000184 /* 20 bytes is enough to hold a 64-bit
185 integer. Decimal takes the most space.
186 This isn't enough for octal. */
Barry Warsawdadace02001-08-24 18:32:06 +0000187 n += 20;
188 break;
189 case 's':
190 s = va_arg(count, char*);
191 n += strlen(s);
192 break;
193 case 'p':
194 (void) va_arg(count, int);
195 /* maximum 64-bit pointer representation:
196 * 0xffffffffffffffff
197 * so 19 characters is enough.
Tim Peters9161c8b2001-12-03 01:55:38 +0000198 * XXX I count 18 -- what's the extra for?
Barry Warsawdadace02001-08-24 18:32:06 +0000199 */
200 n += 19;
201 break;
202 default:
203 /* if we stumble upon an unknown
204 formatting code, copy the rest of
205 the format string to the output
206 string. (we cannot just skip the
207 code, since there's no way to know
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000208 what's in the argument list) */
Barry Warsawdadace02001-08-24 18:32:06 +0000209 n += strlen(p);
210 goto expand;
211 }
212 } else
213 n++;
214 }
215 expand:
216 /* step 2: fill the buffer */
Tim Peters9161c8b2001-12-03 01:55:38 +0000217 /* Since we've analyzed how much space we need for the worst case,
218 use sprintf directly instead of the slower PyOS_snprintf. */
Barry Warsawdadace02001-08-24 18:32:06 +0000219 string = PyString_FromStringAndSize(NULL, n);
220 if (!string)
221 return NULL;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000222
Barry Warsawdadace02001-08-24 18:32:06 +0000223 s = PyString_AsString(string);
224
225 for (f = format; *f; f++) {
226 if (*f == '%') {
227 const char* p = f++;
228 int i, longflag = 0;
229 /* parse the width.precision part (we're only
230 interested in the precision value, if any) */
231 n = 0;
232 while (isdigit(Py_CHARMASK(*f)))
233 n = (n*10) + *f++ - '0';
234 if (*f == '.') {
235 f++;
236 n = 0;
237 while (isdigit(Py_CHARMASK(*f)))
238 n = (n*10) + *f++ - '0';
239 }
240 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
241 f++;
242 /* handle the long flag, but only for %ld. others
243 can be added when necessary. */
244 if (*f == 'l' && *(f+1) == 'd') {
245 longflag = 1;
246 ++f;
247 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000248
Barry Warsawdadace02001-08-24 18:32:06 +0000249 switch (*f) {
250 case 'c':
251 *s++ = va_arg(vargs, int);
252 break;
253 case 'd':
254 if (longflag)
255 sprintf(s, "%ld", va_arg(vargs, long));
256 else
257 sprintf(s, "%d", va_arg(vargs, int));
258 s += strlen(s);
259 break;
260 case 'i':
261 sprintf(s, "%i", va_arg(vargs, int));
262 s += strlen(s);
263 break;
264 case 'x':
265 sprintf(s, "%x", va_arg(vargs, int));
266 s += strlen(s);
267 break;
268 case 's':
269 p = va_arg(vargs, char*);
270 i = strlen(p);
271 if (n > 0 && i > n)
272 i = n;
273 memcpy(s, p, i);
274 s += i;
275 break;
276 case 'p':
277 sprintf(s, "%p", va_arg(vargs, void*));
Tim Peters6af5bbb2001-08-25 03:02:28 +0000278 /* %p is ill-defined: ensure leading 0x. */
279 if (s[1] == 'X')
280 s[1] = 'x';
281 else if (s[1] != 'x') {
282 memmove(s+2, s, strlen(s)+1);
283 s[0] = '0';
284 s[1] = 'x';
285 }
Barry Warsawdadace02001-08-24 18:32:06 +0000286 s += strlen(s);
287 break;
288 case '%':
289 *s++ = '%';
290 break;
291 default:
292 strcpy(s, p);
293 s += strlen(s);
294 goto end;
295 }
296 } else
297 *s++ = *f;
298 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000299
Barry Warsawdadace02001-08-24 18:32:06 +0000300 end:
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000301 _PyString_Resize(&string, s - PyString_AS_STRING(string));
Barry Warsawdadace02001-08-24 18:32:06 +0000302 return string;
303}
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000304
Barry Warsawdadace02001-08-24 18:32:06 +0000305PyObject *
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000306PyString_FromFormat(const char *format, ...)
Barry Warsawdadace02001-08-24 18:32:06 +0000307{
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000308 PyObject* ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000309 va_list vargs;
310
311#ifdef HAVE_STDARG_PROTOTYPES
312 va_start(vargs, format);
313#else
314 va_start(vargs);
315#endif
Barry Warsaw7c47beb2001-08-27 03:11:09 +0000316 ret = PyString_FromFormatV(format, vargs);
317 va_end(vargs);
318 return ret;
Barry Warsawdadace02001-08-24 18:32:06 +0000319}
320
321
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000322PyObject *PyString_Decode(const char *s,
323 int size,
324 const char *encoding,
325 const char *errors)
326{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000327 PyObject *v, *str;
328
329 str = PyString_FromStringAndSize(s, size);
330 if (str == NULL)
331 return NULL;
332 v = PyString_AsDecodedString(str, encoding, errors);
333 Py_DECREF(str);
334 return v;
335}
336
337PyObject *PyString_AsDecodedObject(PyObject *str,
338 const char *encoding,
339 const char *errors)
340{
341 PyObject *v;
342
343 if (!PyString_Check(str)) {
344 PyErr_BadArgument();
345 goto onError;
346 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000347
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000348 if (encoding == NULL) {
349#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000350 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000351#else
352 PyErr_SetString(PyExc_ValueError, "no encoding specified");
353 goto onError;
354#endif
355 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000356
357 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000358 v = PyCodec_Decode(str, encoding, errors);
359 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000360 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000361
362 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000363
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000364 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000365 return NULL;
366}
367
368PyObject *PyString_AsDecodedString(PyObject *str,
369 const char *encoding,
370 const char *errors)
371{
372 PyObject *v;
373
374 v = PyString_AsDecodedObject(str, encoding, errors);
375 if (v == NULL)
376 goto onError;
377
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000378#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000379 /* Convert Unicode to a string using the default encoding */
380 if (PyUnicode_Check(v)) {
381 PyObject *temp = v;
382 v = PyUnicode_AsEncodedString(v, NULL, NULL);
383 Py_DECREF(temp);
384 if (v == NULL)
385 goto onError;
386 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000387#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000388 if (!PyString_Check(v)) {
389 PyErr_Format(PyExc_TypeError,
390 "decoder did not return a string object (type=%.400s)",
391 v->ob_type->tp_name);
392 Py_DECREF(v);
393 goto onError;
394 }
395
396 return v;
397
398 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000399 return NULL;
400}
401
402PyObject *PyString_Encode(const char *s,
403 int size,
404 const char *encoding,
405 const char *errors)
406{
407 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000408
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000409 str = PyString_FromStringAndSize(s, size);
410 if (str == NULL)
411 return NULL;
412 v = PyString_AsEncodedString(str, encoding, errors);
413 Py_DECREF(str);
414 return v;
415}
416
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000417PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000418 const char *encoding,
419 const char *errors)
420{
421 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000422
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000423 if (!PyString_Check(str)) {
424 PyErr_BadArgument();
425 goto onError;
426 }
427
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000428 if (encoding == NULL) {
429#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000430 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000431#else
432 PyErr_SetString(PyExc_ValueError, "no encoding specified");
433 goto onError;
434#endif
435 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000436
437 /* Encode via the codec registry */
438 v = PyCodec_Encode(str, encoding, errors);
439 if (v == NULL)
440 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000441
442 return v;
443
444 onError:
445 return NULL;
446}
447
448PyObject *PyString_AsEncodedString(PyObject *str,
449 const char *encoding,
450 const char *errors)
451{
452 PyObject *v;
453
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000454 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000455 if (v == NULL)
456 goto onError;
457
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000458#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000459 /* Convert Unicode to a string using the default encoding */
460 if (PyUnicode_Check(v)) {
461 PyObject *temp = v;
462 v = PyUnicode_AsEncodedString(v, NULL, NULL);
463 Py_DECREF(temp);
464 if (v == NULL)
465 goto onError;
466 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000467#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000468 if (!PyString_Check(v)) {
469 PyErr_Format(PyExc_TypeError,
470 "encoder did not return a string object (type=%.400s)",
471 v->ob_type->tp_name);
472 Py_DECREF(v);
473 goto onError;
474 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000475
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000476 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000477
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000478 onError:
479 return NULL;
480}
481
Guido van Rossum234f9421993-06-17 12:35:49 +0000482static void
Fred Drakeba096332000-07-09 07:04:36 +0000483string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000484{
Guido van Rossum9475a232001-10-05 20:51:39 +0000485 op->ob_type->tp_free(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000486}
487
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000488static int
489string_getsize(register PyObject *op)
490{
491 char *s;
492 int len;
493 if (PyString_AsStringAndSize(op, &s, &len))
494 return -1;
495 return len;
496}
497
498static /*const*/ char *
499string_getbuffer(register PyObject *op)
500{
501 char *s;
502 int len;
503 if (PyString_AsStringAndSize(op, &s, &len))
504 return NULL;
505 return s;
506}
507
Guido van Rossumd7047b31995-01-02 19:07:15 +0000508int
Fred Drakeba096332000-07-09 07:04:36 +0000509PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000510{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000511 if (!PyString_Check(op))
512 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000513 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514}
515
516/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000517PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000518{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000519 if (!PyString_Check(op))
520 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000521 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000522}
523
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000524int
525PyString_AsStringAndSize(register PyObject *obj,
526 register char **s,
527 register int *len)
528{
529 if (s == NULL) {
530 PyErr_BadInternalCall();
531 return -1;
532 }
533
534 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000535#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000536 if (PyUnicode_Check(obj)) {
537 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
538 if (obj == NULL)
539 return -1;
540 }
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000541 else
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000542#endif
543 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000544 PyErr_Format(PyExc_TypeError,
545 "expected string or Unicode object, "
546 "%.200s found", obj->ob_type->tp_name);
547 return -1;
548 }
549 }
550
551 *s = PyString_AS_STRING(obj);
552 if (len != NULL)
553 *len = PyString_GET_SIZE(obj);
554 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
555 PyErr_SetString(PyExc_TypeError,
556 "expected string without null bytes");
557 return -1;
558 }
559 return 0;
560}
561
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000562/* Methods */
563
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000564static int
Fred Drakeba096332000-07-09 07:04:36 +0000565string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566{
567 int i;
568 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000569 int quote;
Tim Petersc9933152001-10-16 20:18:24 +0000570
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000571 /* XXX Ought to check for interrupts when writing long strings */
Tim Petersc9933152001-10-16 20:18:24 +0000572 if (! PyString_CheckExact(op)) {
573 int ret;
574 /* A str subclass may have its own __str__ method. */
575 op = (PyStringObject *) PyObject_Str((PyObject *)op);
576 if (op == NULL)
577 return -1;
578 ret = string_print(op, fp, flags);
579 Py_DECREF(op);
580 return ret;
581 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000582 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000584 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000586
Thomas Wouters7e474022000-07-16 12:04:32 +0000587 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000588 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000589 if (memchr(op->ob_sval, '\'', op->ob_size) &&
590 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000591 quote = '"';
592
593 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000594 for (i = 0; i < op->ob_size; i++) {
595 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000596 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000598 else if (c == '\t')
599 fprintf(fp, "\\t");
600 else if (c == '\n')
601 fprintf(fp, "\\n");
602 else if (c == '\r')
603 fprintf(fp, "\\r");
604 else if (c < ' ' || c >= 0x7f)
605 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000606 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000607 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000608 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000609 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000610 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000611}
612
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000613static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000614string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000615{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000616 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
617 PyObject *v;
618 if (newsize > INT_MAX) {
619 PyErr_SetString(PyExc_OverflowError,
620 "string is too large to make repr");
621 }
622 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000623 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000624 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000625 }
626 else {
627 register int i;
628 register char c;
629 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000630 int quote;
631
Thomas Wouters7e474022000-07-16 12:04:32 +0000632 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000633 quote = '\'';
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000634 if (memchr(op->ob_sval, '\'', op->ob_size) &&
635 !memchr(op->ob_sval, '"', op->ob_size))
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000636 quote = '"';
637
Tim Peters9161c8b2001-12-03 01:55:38 +0000638 p = PyString_AS_STRING(v);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000639 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000640 for (i = 0; i < op->ob_size; i++) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000641 /* There's at least enough room for a hex escape
642 and a closing quote. */
643 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000644 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000645 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000646 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000647 else if (c == '\t')
648 *p++ = '\\', *p++ = 't';
649 else if (c == '\n')
650 *p++ = '\\', *p++ = 'n';
651 else if (c == '\r')
652 *p++ = '\\', *p++ = 'r';
653 else if (c < ' ' || c >= 0x7f) {
Tim Peters9161c8b2001-12-03 01:55:38 +0000654 /* For performance, we don't want to call
655 PyOS_snprintf here (extra layers of
656 function call). */
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000657 sprintf(p, "\\x%02x", c & 0xff);
658 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000659 }
660 else
661 *p++ = c;
662 }
Tim Peters9161c8b2001-12-03 01:55:38 +0000663 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000664 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000665 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000666 _PyString_Resize(
Tim Peters9161c8b2001-12-03 01:55:38 +0000667 &v, (int) (p - PyString_AS_STRING(v)));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000668 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000669 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670}
671
Guido van Rossum189f1df2001-05-01 16:51:53 +0000672static PyObject *
673string_str(PyObject *s)
674{
Tim Petersc9933152001-10-16 20:18:24 +0000675 assert(PyString_Check(s));
676 if (PyString_CheckExact(s)) {
677 Py_INCREF(s);
678 return s;
679 }
680 else {
681 /* Subtype -- return genuine string with the same value. */
682 PyStringObject *t = (PyStringObject *) s;
683 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
684 }
Guido van Rossum189f1df2001-05-01 16:51:53 +0000685}
686
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000687static int
Fred Drakeba096332000-07-09 07:04:36 +0000688string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000689{
690 return a->ob_size;
691}
692
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000693static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000694string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000695{
696 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000697 register PyStringObject *op;
698 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000699#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 if (PyUnicode_Check(bb))
701 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000702#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000703 PyErr_Format(PyExc_TypeError,
Guido van Rossum5c66a262001-10-22 04:12:44 +0000704 "cannot concatenate 'str' and '%.200s' objects",
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000705 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706 return NULL;
707 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000708#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000709 /* Optimize cases with empty left or right operand */
Tim Peters8fa5dd02001-09-12 02:18:30 +0000710 if ((a->ob_size == 0 || b->ob_size == 0) &&
711 PyString_CheckExact(a) && PyString_CheckExact(b)) {
712 if (a->ob_size == 0) {
713 Py_INCREF(bb);
714 return bb;
715 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000716 Py_INCREF(a);
717 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000718 }
719 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000720 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000721 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000722 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000723 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000724 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000725 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000726 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000727 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000728 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
729 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
730 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000731 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000732#undef b
733}
734
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000735static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000736string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000737{
738 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000739 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000740 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000741 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000742 if (n < 0)
743 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000744 /* watch out for overflows: the size can overflow int,
745 * and the # of bytes needed can overflow size_t
746 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000747 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000748 if (n && size / n != a->ob_size) {
749 PyErr_SetString(PyExc_OverflowError,
750 "repeated string is too long");
751 return NULL;
752 }
Tim Peters8fa5dd02001-09-12 02:18:30 +0000753 if (size == a->ob_size && PyString_CheckExact(a)) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000754 Py_INCREF(a);
755 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000756 }
Tim Peters8f422462000-09-09 06:13:41 +0000757 nbytes = size * sizeof(char);
758 if (nbytes / sizeof(char) != (size_t)size ||
759 nbytes + sizeof(PyStringObject) <= nbytes) {
760 PyErr_SetString(PyExc_OverflowError,
761 "repeated string is too long");
762 return NULL;
763 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000764 op = (PyStringObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +0000765 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000766 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000767 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000768 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000769 op->ob_shash = -1;
Guido van Rossum2a61e741997-01-18 07:55:05 +0000770 op->ob_sinterned = NULL;
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000771 for (i = 0; i < size; i += a->ob_size)
772 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
773 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000774 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000775}
776
777/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
778
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000779static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000780string_slice(register PyStringObject *a, register int i, register int j)
781 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782{
783 if (i < 0)
784 i = 0;
785 if (j < 0)
786 j = 0; /* Avoid signed/unsigned bug in next line */
787 if (j > a->ob_size)
788 j = a->ob_size;
Tim Peters8fa5dd02001-09-12 02:18:30 +0000789 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
790 /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000791 Py_INCREF(a);
792 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000793 }
794 if (j < i)
795 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000796 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000797}
798
Guido van Rossum9284a572000-03-07 15:53:43 +0000799static int
Fred Drakeba096332000-07-09 07:04:36 +0000800string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000801{
802 register char *s, *end;
803 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000804#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000805 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000806 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000807#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000808 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000809 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000810 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000811 return -1;
812 }
813 c = PyString_AsString(el)[0];
814 s = PyString_AsString(a);
815 end = s + PyString_Size(a);
816 while (s < end) {
817 if (c == *s++)
818 return 1;
819 }
820 return 0;
821}
822
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000823static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000824string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000825{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000826 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000827 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000828 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000829 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000830 return NULL;
831 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000832 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000833 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000834 if (v == NULL)
835 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000836 else {
837#ifdef COUNT_ALLOCS
838 one_strings++;
839#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000840 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000841 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000842 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000843}
844
Martin v. Löwiscd353062001-05-24 16:56:35 +0000845static PyObject*
846string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000847{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000848 int c;
849 int len_a, len_b;
850 int min_len;
851 PyObject *result;
852
Guido van Rossum2ed6bf82001-09-27 20:30:07 +0000853 /* Make sure both arguments are strings. */
854 if (!(PyString_Check(a) && PyString_Check(b))) {
Martin v. Löwiscd353062001-05-24 16:56:35 +0000855 result = Py_NotImplemented;
856 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000857 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000858 if (a == b) {
859 switch (op) {
860 case Py_EQ:case Py_LE:case Py_GE:
861 result = Py_True;
862 goto out;
863 case Py_NE:case Py_LT:case Py_GT:
864 result = Py_False;
865 goto out;
866 }
867 }
868 if (op == Py_EQ) {
869 /* Supporting Py_NE here as well does not save
870 much time, since Py_NE is rarely used. */
871 if (a->ob_size == b->ob_size
872 && (a->ob_sval[0] == b->ob_sval[0]
Guido van Rossum3aa3fc42002-04-15 13:48:52 +0000873 && memcmp(a->ob_sval, b->ob_sval,
Martin v. Löwiscd353062001-05-24 16:56:35 +0000874 a->ob_size) == 0)) {
875 result = Py_True;
876 } else {
877 result = Py_False;
878 }
879 goto out;
880 }
881 len_a = a->ob_size; len_b = b->ob_size;
882 min_len = (len_a < len_b) ? len_a : len_b;
883 if (min_len > 0) {
884 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
885 if (c==0)
886 c = memcmp(a->ob_sval, b->ob_sval, min_len);
887 }else
888 c = 0;
889 if (c == 0)
890 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
891 switch (op) {
892 case Py_LT: c = c < 0; break;
893 case Py_LE: c = c <= 0; break;
894 case Py_EQ: assert(0); break; /* unreachable */
895 case Py_NE: c = c != 0; break;
896 case Py_GT: c = c > 0; break;
897 case Py_GE: c = c >= 0; break;
898 default:
899 result = Py_NotImplemented;
900 goto out;
901 }
902 result = c ? Py_True : Py_False;
903 out:
904 Py_INCREF(result);
905 return result;
906}
907
908int
909_PyString_Eq(PyObject *o1, PyObject *o2)
910{
911 PyStringObject *a, *b;
912 a = (PyStringObject*)o1;
913 b = (PyStringObject*)o2;
914 return a->ob_size == b->ob_size
915 && *a->ob_sval == *b->ob_sval
916 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000917}
918
Guido van Rossum9bfef441993-03-29 10:43:31 +0000919static long
Fred Drakeba096332000-07-09 07:04:36 +0000920string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000921{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000922 register int len;
923 register unsigned char *p;
924 register long x;
925
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000926 if (a->ob_shash != -1)
927 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000928 if (a->ob_sinterned != NULL)
929 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000930 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000931 len = a->ob_size;
932 p = (unsigned char *) a->ob_sval;
933 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000934 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000935 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000936 x ^= a->ob_size;
937 if (x == -1)
938 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000939 a->ob_shash = x;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000940 return x;
941}
942
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000943static int
Fred Drakeba096332000-07-09 07:04:36 +0000944string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000945{
946 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000947 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000948 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000949 return -1;
950 }
951 *ptr = (void *)self->ob_sval;
952 return self->ob_size;
953}
954
955static int
Fred Drakeba096332000-07-09 07:04:36 +0000956string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000957{
Guido van Rossum045e6881997-09-08 18:30:11 +0000958 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000959 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000960 return -1;
961}
962
963static int
Fred Drakeba096332000-07-09 07:04:36 +0000964string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000965{
966 if ( lenp )
967 *lenp = self->ob_size;
968 return 1;
969}
970
Guido van Rossum1db70701998-10-08 02:18:52 +0000971static int
Fred Drakeba096332000-07-09 07:04:36 +0000972string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000973{
974 if ( index != 0 ) {
975 PyErr_SetString(PyExc_SystemError,
976 "accessing non-existent string segment");
977 return -1;
978 }
979 *ptr = self->ob_sval;
980 return self->ob_size;
981}
982
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000983static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000984 (inquiry)string_length, /*sq_length*/
985 (binaryfunc)string_concat, /*sq_concat*/
986 (intargfunc)string_repeat, /*sq_repeat*/
987 (intargfunc)string_item, /*sq_item*/
988 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000989 0, /*sq_ass_item*/
990 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000991 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000992};
993
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000994static PyBufferProcs string_as_buffer = {
995 (getreadbufferproc)string_buffer_getreadbuf,
996 (getwritebufferproc)string_buffer_getwritebuf,
997 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000998 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000999};
1000
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001001
1002
1003#define LEFTSTRIP 0
1004#define RIGHTSTRIP 1
1005#define BOTHSTRIP 2
1006
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001007/* Arrays indexed by above */
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001008static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1009
1010#define STRIPNAME(i) (stripformat[i]+3)
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001011
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001012
1013static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +00001014split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001015{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001016 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001017 PyObject* item;
1018 PyObject *list = PyList_New(0);
1019
1020 if (list == NULL)
1021 return NULL;
1022
Guido van Rossum4c08d552000-03-10 22:55:18 +00001023 for (i = j = 0; i < len; ) {
1024 while (i < len && isspace(Py_CHARMASK(s[i])))
1025 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001026 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001027 while (i < len && !isspace(Py_CHARMASK(s[i])))
1028 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001029 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001030 if (maxsplit-- <= 0)
1031 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001032 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1033 if (item == NULL)
1034 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001035 err = PyList_Append(list, item);
1036 Py_DECREF(item);
1037 if (err < 0)
1038 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001039 while (i < len && isspace(Py_CHARMASK(s[i])))
1040 i++;
1041 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001042 }
1043 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001044 if (j < len) {
1045 item = PyString_FromStringAndSize(s+j, (int)(len - j));
1046 if (item == NULL)
1047 goto finally;
1048 err = PyList_Append(list, item);
1049 Py_DECREF(item);
1050 if (err < 0)
1051 goto finally;
1052 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001053 return list;
1054 finally:
1055 Py_DECREF(list);
1056 return NULL;
1057}
1058
1059
1060static char split__doc__[] =
1061"S.split([sep [,maxsplit]]) -> list of strings\n\
1062\n\
1063Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001064delimiter string. If maxsplit is given, at most maxsplit\n\
1065splits are done. If sep is not specified, any whitespace string\n\
1066is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001067
1068static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001069string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001070{
1071 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001072 int maxsplit = -1;
1073 const char *s = PyString_AS_STRING(self), *sub;
1074 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001075
Guido van Rossum4c08d552000-03-10 22:55:18 +00001076 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001077 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001078 if (maxsplit < 0)
1079 maxsplit = INT_MAX;
1080 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001081 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001082 if (PyString_Check(subobj)) {
1083 sub = PyString_AS_STRING(subobj);
1084 n = PyString_GET_SIZE(subobj);
1085 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001086#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001087 else if (PyUnicode_Check(subobj))
1088 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001089#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001090 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1091 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001092 if (n == 0) {
1093 PyErr_SetString(PyExc_ValueError, "empty separator");
1094 return NULL;
1095 }
1096
1097 list = PyList_New(0);
1098 if (list == NULL)
1099 return NULL;
1100
1101 i = j = 0;
1102 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +00001103 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00001104 if (maxsplit-- <= 0)
1105 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001106 item = PyString_FromStringAndSize(s+j, (int)(i-j));
1107 if (item == NULL)
1108 goto fail;
1109 err = PyList_Append(list, item);
1110 Py_DECREF(item);
1111 if (err < 0)
1112 goto fail;
1113 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001114 }
1115 else
1116 i++;
1117 }
1118 item = PyString_FromStringAndSize(s+j, (int)(len-j));
1119 if (item == NULL)
1120 goto fail;
1121 err = PyList_Append(list, item);
1122 Py_DECREF(item);
1123 if (err < 0)
1124 goto fail;
1125
1126 return list;
1127
1128 fail:
1129 Py_DECREF(list);
1130 return NULL;
1131}
1132
1133
1134static char join__doc__[] =
1135"S.join(sequence) -> string\n\
1136\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001137Return a string which is the concatenation of the strings in the\n\
1138sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001139
1140static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001141string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001142{
1143 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +00001144 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001145 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001146 char *p;
1147 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +00001148 size_t sz = 0;
1149 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001150 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001151
Tim Peters19fe14e2001-01-19 03:03:47 +00001152 seq = PySequence_Fast(orig, "");
1153 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001154 if (PyErr_ExceptionMatches(PyExc_TypeError))
1155 PyErr_Format(PyExc_TypeError,
1156 "sequence expected, %.80s found",
1157 orig->ob_type->tp_name);
1158 return NULL;
1159 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001160
Jeremy Hylton03657cf2000-07-12 13:05:33 +00001161 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +00001162 if (seqlen == 0) {
1163 Py_DECREF(seq);
1164 return PyString_FromString("");
1165 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001166 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001167 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +00001168 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
1169 PyErr_Format(PyExc_TypeError,
1170 "sequence item 0: expected string,"
1171 " %.80s found",
1172 item->ob_type->tp_name);
1173 Py_DECREF(seq);
1174 return NULL;
1175 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001176 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +00001177 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001178 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001179 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001180
Tim Peters19fe14e2001-01-19 03:03:47 +00001181 /* There are at least two things to join. Do a pre-pass to figure out
1182 * the total amount of space we'll need (sz), see whether any argument
1183 * is absurd, and defer to the Unicode join if appropriate.
1184 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001185 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +00001186 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001187 item = PySequence_Fast_GET_ITEM(seq, i);
1188 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001189#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001190 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +00001191 /* Defer to Unicode join.
1192 * CAUTION: There's no gurantee that the
1193 * original sequence can be iterated over
1194 * again, so we must pass seq here.
1195 */
1196 PyObject *result;
1197 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001198 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001199 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001200 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001201#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001202 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001203 "sequence item %i: expected string,"
1204 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001205 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001206 Py_DECREF(seq);
1207 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001208 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001209 sz += PyString_GET_SIZE(item);
1210 if (i != 0)
1211 sz += seplen;
1212 if (sz < old_sz || sz > INT_MAX) {
1213 PyErr_SetString(PyExc_OverflowError,
1214 "join() is too long for a Python string");
1215 Py_DECREF(seq);
1216 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001217 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001218 }
1219
1220 /* Allocate result space. */
1221 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1222 if (res == NULL) {
1223 Py_DECREF(seq);
1224 return NULL;
1225 }
1226
1227 /* Catenate everything. */
1228 p = PyString_AS_STRING(res);
1229 for (i = 0; i < seqlen; ++i) {
1230 size_t n;
1231 item = PySequence_Fast_GET_ITEM(seq, i);
1232 n = PyString_GET_SIZE(item);
1233 memcpy(p, PyString_AS_STRING(item), n);
1234 p += n;
1235 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001236 memcpy(p, sep, seplen);
1237 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001238 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001239 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001240
Jeremy Hylton49048292000-07-11 03:28:17 +00001241 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001243}
1244
Tim Peters52e155e2001-06-16 05:42:57 +00001245PyObject *
1246_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001247{
Tim Petersa7259592001-06-16 05:11:17 +00001248 assert(sep != NULL && PyString_Check(sep));
1249 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001250 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001251}
1252
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001253static long
Fred Drakeba096332000-07-09 07:04:36 +00001254string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001255{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001256 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001257 int len = PyString_GET_SIZE(self);
1258 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001259 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001261 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001262 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001263 return -2;
1264 if (PyString_Check(subobj)) {
1265 sub = PyString_AS_STRING(subobj);
1266 n = PyString_GET_SIZE(subobj);
1267 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001268#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001269 else if (PyUnicode_Check(subobj))
1270 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001271#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001272 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001273 return -2;
1274
1275 if (last > len)
1276 last = len;
1277 if (last < 0)
1278 last += len;
1279 if (last < 0)
1280 last = 0;
1281 if (i < 0)
1282 i += len;
1283 if (i < 0)
1284 i = 0;
1285
Guido van Rossum4c08d552000-03-10 22:55:18 +00001286 if (dir > 0) {
1287 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001288 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001289 last -= n;
1290 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001291 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001292 return (long)i;
1293 }
1294 else {
1295 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001296
Guido van Rossum4c08d552000-03-10 22:55:18 +00001297 if (n == 0 && i <= last)
1298 return (long)last;
1299 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001300 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001301 return (long)j;
1302 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001303
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001304 return -1;
1305}
1306
1307
1308static char find__doc__[] =
1309"S.find(sub [,start [,end]]) -> int\n\
1310\n\
1311Return the lowest index in S where substring sub is found,\n\
1312such that sub is contained within s[start,end]. Optional\n\
1313arguments start and end are interpreted as in slice notation.\n\
1314\n\
1315Return -1 on failure.";
1316
1317static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001318string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001319{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001320 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321 if (result == -2)
1322 return NULL;
1323 return PyInt_FromLong(result);
1324}
1325
1326
1327static char index__doc__[] =
1328"S.index(sub [,start [,end]]) -> int\n\
1329\n\
1330Like S.find() but raise ValueError when the substring is not found.";
1331
1332static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001333string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001334{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001335 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001336 if (result == -2)
1337 return NULL;
1338 if (result == -1) {
1339 PyErr_SetString(PyExc_ValueError,
1340 "substring not found in string.index");
1341 return NULL;
1342 }
1343 return PyInt_FromLong(result);
1344}
1345
1346
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347static char rfind__doc__[] =
1348"S.rfind(sub [,start [,end]]) -> int\n\
1349\n\
1350Return the highest index in S where substring sub is found,\n\
1351such that sub is contained within s[start,end]. Optional\n\
1352arguments start and end are interpreted as in slice notation.\n\
1353\n\
1354Return -1 on failure.";
1355
1356static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001357string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001359 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001360 if (result == -2)
1361 return NULL;
1362 return PyInt_FromLong(result);
1363}
1364
1365
1366static char rindex__doc__[] =
1367"S.rindex(sub [,start [,end]]) -> int\n\
1368\n\
1369Like S.rfind() but raise ValueError when the substring is not found.";
1370
1371static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001372string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001373{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001374 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375 if (result == -2)
1376 return NULL;
1377 if (result == -1) {
1378 PyErr_SetString(PyExc_ValueError,
1379 "substring not found in string.rindex");
1380 return NULL;
1381 }
1382 return PyInt_FromLong(result);
1383}
1384
1385
1386static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001387do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1388{
1389 char *s = PyString_AS_STRING(self);
1390 int len = PyString_GET_SIZE(self);
1391 char *sep = PyString_AS_STRING(sepobj);
1392 int seplen = PyString_GET_SIZE(sepobj);
1393 int i, j;
1394
1395 i = 0;
1396 if (striptype != RIGHTSTRIP) {
1397 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1398 i++;
1399 }
1400 }
1401
1402 j = len;
1403 if (striptype != LEFTSTRIP) {
1404 do {
1405 j--;
1406 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1407 j++;
1408 }
1409
1410 if (i == 0 && j == len && PyString_CheckExact(self)) {
1411 Py_INCREF(self);
1412 return (PyObject*)self;
1413 }
1414 else
1415 return PyString_FromStringAndSize(s+i, j-i);
1416}
1417
1418
1419static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001420do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001421{
1422 char *s = PyString_AS_STRING(self);
1423 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 i = 0;
1426 if (striptype != RIGHTSTRIP) {
1427 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1428 i++;
1429 }
1430 }
1431
1432 j = len;
1433 if (striptype != LEFTSTRIP) {
1434 do {
1435 j--;
1436 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1437 j++;
1438 }
1439
Tim Peters8fa5dd02001-09-12 02:18:30 +00001440 if (i == 0 && j == len && PyString_CheckExact(self)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441 Py_INCREF(self);
1442 return (PyObject*)self;
1443 }
1444 else
1445 return PyString_FromStringAndSize(s+i, j-i);
1446}
1447
1448
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001449static PyObject *
1450do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1451{
1452 PyObject *sep = NULL;
1453
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001454 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001455 return NULL;
1456
1457 if (sep != NULL && sep != Py_None) {
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001458 if (PyString_Check(sep))
1459 return do_xstrip(self, striptype, sep);
1460 else if (PyUnicode_Check(sep)) {
1461 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1462 PyObject *res;
1463 if (uniself==NULL)
1464 return NULL;
1465 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1466 striptype, sep);
1467 Py_DECREF(uniself);
1468 return res;
1469 }
1470 else {
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001471 PyErr_Format(PyExc_TypeError,
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001472 "%s arg must be None, str or unicode",
1473 STRIPNAME(striptype));
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001474 return NULL;
1475 }
1476 return do_xstrip(self, striptype, sep);
1477 }
1478
1479 return do_strip(self, striptype);
1480}
1481
1482
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001483static char strip__doc__[] =
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001484"S.strip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001485\n\
1486Return a copy of the string S with leading and trailing\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001487whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001488If sep is given and not None, remove characters in sep instead.\n\
1489If sep is unicode, S will be converted to unicode before stripping";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001490
1491static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001492string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001493{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001494 if (PyTuple_GET_SIZE(args) == 0)
1495 return do_strip(self, BOTHSTRIP); /* Common case */
1496 else
1497 return do_argstrip(self, BOTHSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001498}
1499
1500
1501static char lstrip__doc__[] =
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001502"S.lstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001503\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001504Return a copy of the string S with leading whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001505If sep is given and not None, remove characters in sep instead.\n\
1506If sep is unicode, S will be converted to unicode before stripping";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001507
1508static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001509string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001511 if (PyTuple_GET_SIZE(args) == 0)
1512 return do_strip(self, LEFTSTRIP); /* Common case */
1513 else
1514 return do_argstrip(self, LEFTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001515}
1516
1517
1518static char rstrip__doc__[] =
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001519"S.rstrip([sep]) -> string or unicode\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001520\n\
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001521Return a copy of the string S with trailing whitespace removed.\n\
Walter Dörwaldde02bcb2002-04-22 17:42:37 +00001522If sep is given and not None, remove characters in sep instead.\n\
1523If sep is unicode, S will be converted to unicode before stripping";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524
1525static PyObject *
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001526string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527{
Guido van Rossum018b0eb2002-04-13 00:56:08 +00001528 if (PyTuple_GET_SIZE(args) == 0)
1529 return do_strip(self, RIGHTSTRIP); /* Common case */
1530 else
1531 return do_argstrip(self, RIGHTSTRIP, args);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532}
1533
1534
1535static char lower__doc__[] =
1536"S.lower() -> string\n\
1537\n\
1538Return a copy of the string S converted to lowercase.";
1539
1540static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001541string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542{
1543 char *s = PyString_AS_STRING(self), *s_new;
1544 int i, n = PyString_GET_SIZE(self);
1545 PyObject *new;
1546
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001547 new = PyString_FromStringAndSize(NULL, n);
1548 if (new == NULL)
1549 return NULL;
1550 s_new = PyString_AsString(new);
1551 for (i = 0; i < n; i++) {
1552 int c = Py_CHARMASK(*s++);
1553 if (isupper(c)) {
1554 *s_new = tolower(c);
1555 } else
1556 *s_new = c;
1557 s_new++;
1558 }
1559 return new;
1560}
1561
1562
1563static char upper__doc__[] =
1564"S.upper() -> string\n\
1565\n\
1566Return a copy of the string S converted to uppercase.";
1567
1568static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001569string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001570{
1571 char *s = PyString_AS_STRING(self), *s_new;
1572 int i, n = PyString_GET_SIZE(self);
1573 PyObject *new;
1574
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001575 new = PyString_FromStringAndSize(NULL, n);
1576 if (new == NULL)
1577 return NULL;
1578 s_new = PyString_AsString(new);
1579 for (i = 0; i < n; i++) {
1580 int c = Py_CHARMASK(*s++);
1581 if (islower(c)) {
1582 *s_new = toupper(c);
1583 } else
1584 *s_new = c;
1585 s_new++;
1586 }
1587 return new;
1588}
1589
1590
Guido van Rossum4c08d552000-03-10 22:55:18 +00001591static char title__doc__[] =
1592"S.title() -> string\n\
1593\n\
1594Return a titlecased version of S, i.e. words start with uppercase\n\
1595characters, all remaining cased characters have lowercase.";
1596
1597static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001598string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001599{
1600 char *s = PyString_AS_STRING(self), *s_new;
1601 int i, n = PyString_GET_SIZE(self);
1602 int previous_is_cased = 0;
1603 PyObject *new;
1604
Guido van Rossum4c08d552000-03-10 22:55:18 +00001605 new = PyString_FromStringAndSize(NULL, n);
1606 if (new == NULL)
1607 return NULL;
1608 s_new = PyString_AsString(new);
1609 for (i = 0; i < n; i++) {
1610 int c = Py_CHARMASK(*s++);
1611 if (islower(c)) {
1612 if (!previous_is_cased)
1613 c = toupper(c);
1614 previous_is_cased = 1;
1615 } else if (isupper(c)) {
1616 if (previous_is_cased)
1617 c = tolower(c);
1618 previous_is_cased = 1;
1619 } else
1620 previous_is_cased = 0;
1621 *s_new++ = c;
1622 }
1623 return new;
1624}
1625
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626static char capitalize__doc__[] =
1627"S.capitalize() -> string\n\
1628\n\
1629Return a copy of the string S with only its first character\n\
1630capitalized.";
1631
1632static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001633string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001634{
1635 char *s = PyString_AS_STRING(self), *s_new;
1636 int i, n = PyString_GET_SIZE(self);
1637 PyObject *new;
1638
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639 new = PyString_FromStringAndSize(NULL, n);
1640 if (new == NULL)
1641 return NULL;
1642 s_new = PyString_AsString(new);
1643 if (0 < n) {
1644 int c = Py_CHARMASK(*s++);
1645 if (islower(c))
1646 *s_new = toupper(c);
1647 else
1648 *s_new = c;
1649 s_new++;
1650 }
1651 for (i = 1; i < n; i++) {
1652 int c = Py_CHARMASK(*s++);
1653 if (isupper(c))
1654 *s_new = tolower(c);
1655 else
1656 *s_new = c;
1657 s_new++;
1658 }
1659 return new;
1660}
1661
1662
1663static char count__doc__[] =
1664"S.count(sub[, start[, end]]) -> int\n\
1665\n\
1666Return the number of occurrences of substring sub in string\n\
1667S[start:end]. Optional arguments start and end are\n\
1668interpreted as in slice notation.";
1669
1670static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001671string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001673 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001674 int len = PyString_GET_SIZE(self), n;
1675 int i = 0, last = INT_MAX;
1676 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001677 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001678
Guido van Rossumc6821402000-05-08 14:08:05 +00001679 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1680 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001681 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001682
Guido van Rossum4c08d552000-03-10 22:55:18 +00001683 if (PyString_Check(subobj)) {
1684 sub = PyString_AS_STRING(subobj);
1685 n = PyString_GET_SIZE(subobj);
1686 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001687#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001688 else if (PyUnicode_Check(subobj)) {
1689 int count;
1690 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1691 if (count == -1)
1692 return NULL;
1693 else
1694 return PyInt_FromLong((long) count);
1695 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001696#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001697 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1698 return NULL;
1699
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001700 if (last > len)
1701 last = len;
1702 if (last < 0)
1703 last += len;
1704 if (last < 0)
1705 last = 0;
1706 if (i < 0)
1707 i += len;
1708 if (i < 0)
1709 i = 0;
1710 m = last + 1 - n;
1711 if (n == 0)
1712 return PyInt_FromLong((long) (m-i));
1713
1714 r = 0;
1715 while (i < m) {
1716 if (!memcmp(s+i, sub, n)) {
1717 r++;
1718 i += n;
1719 } else {
1720 i++;
1721 }
1722 }
1723 return PyInt_FromLong((long) r);
1724}
1725
1726
1727static char swapcase__doc__[] =
1728"S.swapcase() -> string\n\
1729\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001730Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001731converted to lowercase and vice versa.";
1732
1733static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001734string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735{
1736 char *s = PyString_AS_STRING(self), *s_new;
1737 int i, n = PyString_GET_SIZE(self);
1738 PyObject *new;
1739
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740 new = PyString_FromStringAndSize(NULL, n);
1741 if (new == NULL)
1742 return NULL;
1743 s_new = PyString_AsString(new);
1744 for (i = 0; i < n; i++) {
1745 int c = Py_CHARMASK(*s++);
1746 if (islower(c)) {
1747 *s_new = toupper(c);
1748 }
1749 else if (isupper(c)) {
1750 *s_new = tolower(c);
1751 }
1752 else
1753 *s_new = c;
1754 s_new++;
1755 }
1756 return new;
1757}
1758
1759
1760static char translate__doc__[] =
1761"S.translate(table [,deletechars]) -> string\n\
1762\n\
1763Return a copy of the string S, where all characters occurring\n\
1764in the optional argument deletechars are removed, and the\n\
1765remaining characters have been mapped through the given\n\
1766translation table, which must be a string of length 256.";
1767
1768static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001769string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001771 register char *input, *output;
1772 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001773 register int i, c, changed = 0;
1774 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001775 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776 int inlen, tablen, dellen = 0;
1777 PyObject *result;
1778 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001779 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001780
Guido van Rossum4c08d552000-03-10 22:55:18 +00001781 if (!PyArg_ParseTuple(args, "O|O:translate",
1782 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001783 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001784
1785 if (PyString_Check(tableobj)) {
1786 table1 = PyString_AS_STRING(tableobj);
1787 tablen = PyString_GET_SIZE(tableobj);
1788 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001789#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001790 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001791 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001792 parameter; instead a mapping to None will cause characters
1793 to be deleted. */
1794 if (delobj != NULL) {
1795 PyErr_SetString(PyExc_TypeError,
1796 "deletions are implemented differently for unicode");
1797 return NULL;
1798 }
1799 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1800 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001801#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001802 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001803 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001804
1805 if (delobj != NULL) {
1806 if (PyString_Check(delobj)) {
1807 del_table = PyString_AS_STRING(delobj);
1808 dellen = PyString_GET_SIZE(delobj);
1809 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001810#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001811 else if (PyUnicode_Check(delobj)) {
1812 PyErr_SetString(PyExc_TypeError,
1813 "deletions are implemented differently for unicode");
1814 return NULL;
1815 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001816#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001817 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1818 return NULL;
1819
1820 if (tablen != 256) {
1821 PyErr_SetString(PyExc_ValueError,
1822 "translation table must be 256 characters long");
1823 return NULL;
1824 }
1825 }
1826 else {
1827 del_table = NULL;
1828 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001829 }
1830
1831 table = table1;
1832 inlen = PyString_Size(input_obj);
1833 result = PyString_FromStringAndSize((char *)NULL, inlen);
1834 if (result == NULL)
1835 return NULL;
1836 output_start = output = PyString_AsString(result);
1837 input = PyString_AsString(input_obj);
1838
1839 if (dellen == 0) {
1840 /* If no deletions are required, use faster code */
1841 for (i = inlen; --i >= 0; ) {
1842 c = Py_CHARMASK(*input++);
1843 if (Py_CHARMASK((*output++ = table[c])) != c)
1844 changed = 1;
1845 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001846 if (changed || !PyString_CheckExact(input_obj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001847 return result;
1848 Py_DECREF(result);
1849 Py_INCREF(input_obj);
1850 return input_obj;
1851 }
1852
1853 for (i = 0; i < 256; i++)
1854 trans_table[i] = Py_CHARMASK(table[i]);
1855
1856 for (i = 0; i < dellen; i++)
1857 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1858
1859 for (i = inlen; --i >= 0; ) {
1860 c = Py_CHARMASK(*input++);
1861 if (trans_table[c] != -1)
1862 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1863 continue;
1864 changed = 1;
1865 }
Tim Peters8fa5dd02001-09-12 02:18:30 +00001866 if (!changed && PyString_CheckExact(input_obj)) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867 Py_DECREF(result);
1868 Py_INCREF(input_obj);
1869 return input_obj;
1870 }
1871 /* Fix the size of the resulting string */
1872 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1873 return NULL;
1874 return result;
1875}
1876
1877
1878/* What follows is used for implementing replace(). Perry Stoll. */
1879
1880/*
1881 mymemfind
1882
1883 strstr replacement for arbitrary blocks of memory.
1884
Barry Warsaw51ac5802000-03-20 16:36:48 +00001885 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886 contents of memory pointed to by PAT. Returns the index into MEM if
1887 found, or -1 if not found. If len of PAT is greater than length of
1888 MEM, the function returns -1.
1889*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001890static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001891mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892{
1893 register int ii;
1894
1895 /* pattern can not occur in the last pat_len-1 chars */
1896 len -= pat_len;
1897
1898 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001899 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001900 return ii;
1901 }
1902 }
1903 return -1;
1904}
1905
1906/*
1907 mymemcnt
1908
1909 Return the number of distinct times PAT is found in MEM.
1910 meaning mem=1111 and pat==11 returns 2.
1911 mem=11111 and pat==11 also return 2.
1912 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001913static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001914mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915{
1916 register int offset = 0;
1917 int nfound = 0;
1918
1919 while (len >= 0) {
1920 offset = mymemfind(mem, len, pat, pat_len);
1921 if (offset == -1)
1922 break;
1923 mem += offset + pat_len;
1924 len -= offset + pat_len;
1925 nfound++;
1926 }
1927 return nfound;
1928}
1929
1930/*
1931 mymemreplace
1932
Thomas Wouters7e474022000-07-16 12:04:32 +00001933 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001934 replaced with SUB.
1935
Thomas Wouters7e474022000-07-16 12:04:32 +00001936 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001937 of PAT in STR, then the original string is returned. Otherwise, a new
1938 string is allocated here and returned.
1939
1940 on return, out_len is:
1941 the length of output string, or
1942 -1 if the input string is returned, or
1943 unchanged if an error occurs (no memory).
1944
1945 return value is:
1946 the new string allocated locally, or
1947 NULL if an error occurred.
1948*/
1949static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001950mymemreplace(const char *str, int len, /* input string */
1951 const char *pat, int pat_len, /* pattern string to find */
1952 const char *sub, int sub_len, /* substitution string */
1953 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001954 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001955{
1956 char *out_s;
1957 char *new_s;
1958 int nfound, offset, new_len;
1959
1960 if (len == 0 || pat_len > len)
1961 goto return_same;
1962
1963 /* find length of output string */
1964 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001965 if (count < 0)
1966 count = INT_MAX;
1967 else if (nfound > count)
1968 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001969 if (nfound == 0)
1970 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001971
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001972 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001973 if (new_len == 0) {
1974 /* Have to allocate something for the caller to free(). */
1975 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001976 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001977 return NULL;
1978 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001979 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001980 else {
1981 assert(new_len > 0);
1982 new_s = (char *)PyMem_MALLOC(new_len);
1983 if (new_s == NULL)
1984 return NULL;
1985 out_s = new_s;
1986
Tim Peters9c012af2001-05-10 00:32:57 +00001987 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001988 /* find index of next instance of pattern */
1989 offset = mymemfind(str, len, pat, pat_len);
1990 if (offset == -1)
1991 break;
1992
1993 /* copy non matching part of input string */
1994 memcpy(new_s, str, offset);
1995 str += offset + pat_len;
1996 len -= offset + pat_len;
1997
1998 /* copy substitute into the output string */
1999 new_s += offset;
2000 memcpy(new_s, sub, sub_len);
2001 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002002 }
2003 /* copy any remaining values into output string */
2004 if (len > 0)
2005 memcpy(new_s, str, len);
2006 }
2007 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002008 return out_s;
2009
2010 return_same:
2011 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00002012 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002013}
2014
2015
2016static char replace__doc__[] =
2017"S.replace (old, new[, maxsplit]) -> string\n\
2018\n\
2019Return a copy of string S with all occurrences of substring\n\
2020old replaced by new. If the optional argument maxsplit is\n\
2021given, only the first maxsplit occurrences are replaced.";
2022
2023static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002024string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002025{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002026 const char *str = PyString_AS_STRING(self), *sub, *repl;
2027 char *new_s;
Tim Peters8fa5dd02001-09-12 02:18:30 +00002028 const int len = PyString_GET_SIZE(self);
2029 int sub_len, repl_len, out_len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002030 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002031 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002032 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002033
Guido van Rossum4c08d552000-03-10 22:55:18 +00002034 if (!PyArg_ParseTuple(args, "OO|i:replace",
2035 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002036 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002037
2038 if (PyString_Check(subobj)) {
2039 sub = PyString_AS_STRING(subobj);
2040 sub_len = PyString_GET_SIZE(subobj);
2041 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002042#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002043 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002044 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002045 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002046#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002047 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
2048 return NULL;
2049
2050 if (PyString_Check(replobj)) {
2051 repl = PyString_AS_STRING(replobj);
2052 repl_len = PyString_GET_SIZE(replobj);
2053 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002054#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00002055 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002056 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002057 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002058#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002059 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
2060 return NULL;
2061
Guido van Rossum96a45ad2000-03-13 15:56:08 +00002062 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00002063 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002064 return NULL;
2065 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00002066 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002067 if (new_s == NULL) {
2068 PyErr_NoMemory();
2069 return NULL;
2070 }
2071 if (out_len == -1) {
Tim Peters8fa5dd02001-09-12 02:18:30 +00002072 if (PyString_CheckExact(self)) {
2073 /* we're returning another reference to self */
2074 new = (PyObject*)self;
2075 Py_INCREF(new);
2076 }
2077 else {
2078 new = PyString_FromStringAndSize(str, len);
2079 if (new == NULL)
2080 return NULL;
2081 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002082 }
2083 else {
2084 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00002085 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002086 }
2087 return new;
2088}
2089
2090
2091static char startswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002092"S.startswith(prefix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002093\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002094Return True if S starts with the specified prefix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002095optional start, test S beginning at that position. With optional end, stop\n\
2096comparing S at that position.";
2097
2098static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002099string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002100{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002101 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002102 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002103 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002104 int plen;
2105 int start = 0;
2106 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002108
Guido van Rossumc6821402000-05-08 14:08:05 +00002109 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
2110 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002111 return NULL;
2112 if (PyString_Check(subobj)) {
2113 prefix = PyString_AS_STRING(subobj);
2114 plen = PyString_GET_SIZE(subobj);
2115 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002116#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002117 else if (PyUnicode_Check(subobj)) {
2118 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002119 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002120 subobj, start, end, -1);
2121 if (rc == -1)
2122 return NULL;
2123 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002124 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002125 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002126#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002127 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002128 return NULL;
2129
2130 /* adopt Java semantics for index out of range. it is legal for
2131 * offset to be == plen, but this only returns true if prefix is
2132 * the empty string.
2133 */
2134 if (start < 0 || start+plen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002135 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002136
2137 if (!memcmp(str+start, prefix, plen)) {
2138 /* did the match end after the specified end? */
2139 if (end < 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002140 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002141 else if (end - start < plen)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002142 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002143 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002144 return PyBool_FromLong(1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002145 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002146 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002147}
2148
2149
2150static char endswith__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002151"S.endswith(suffix[, start[, end]]) -> bool\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002152\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002153Return True if S ends with the specified suffix, False otherwise. With\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002154optional start, test S beginning at that position. With optional end, stop\n\
2155comparing S at that position.";
2156
2157static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002158string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002159{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002160 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002161 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002162 const char* suffix;
2163 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002164 int start = 0;
2165 int end = -1;
2166 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002167 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002168
Guido van Rossumc6821402000-05-08 14:08:05 +00002169 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
2170 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 return NULL;
2172 if (PyString_Check(subobj)) {
2173 suffix = PyString_AS_STRING(subobj);
2174 slen = PyString_GET_SIZE(subobj);
2175 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002176#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002177 else if (PyUnicode_Check(subobj)) {
2178 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002179 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002180 subobj, start, end, +1);
2181 if (rc == -1)
2182 return NULL;
2183 else
Guido van Rossum77f6a652002-04-03 22:41:51 +00002184 return PyBool_FromLong((long) rc);
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00002185 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002186#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00002187 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002188 return NULL;
2189
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190 if (start < 0 || start > len || slen > len)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002191 return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002192
2193 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002194 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002195
Guido van Rossum4c08d552000-03-10 22:55:18 +00002196 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002197 return PyBool_FromLong(1);
2198 else return PyBool_FromLong(0);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002199}
2200
2201
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002202static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002203"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002204\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002205Encodes S using the codec registered for encoding. encoding defaults\n\
2206to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002207handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2208a ValueError. Other possible values are 'ignore' and 'replace'.";
2209
2210static PyObject *
2211string_encode(PyStringObject *self, PyObject *args)
2212{
2213 char *encoding = NULL;
2214 char *errors = NULL;
2215 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
2216 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002217 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
2218}
2219
2220
2221static char decode__doc__[] =
2222"S.decode([encoding[,errors]]) -> object\n\
2223\n\
2224Decodes S using the codec registered for encoding. encoding defaults\n\
2225to the default encoding. errors may be given to set a different error\n\
2226handling scheme. Default is 'strict' meaning that encoding errors raise\n\
2227a ValueError. Other possible values are 'ignore' and 'replace'.";
2228
2229static PyObject *
2230string_decode(PyStringObject *self, PyObject *args)
2231{
2232 char *encoding = NULL;
2233 char *errors = NULL;
2234 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
2235 return NULL;
2236 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002237}
2238
2239
Guido van Rossum4c08d552000-03-10 22:55:18 +00002240static char expandtabs__doc__[] =
2241"S.expandtabs([tabsize]) -> string\n\
2242\n\
2243Return a copy of S where all tab characters are expanded using spaces.\n\
2244If tabsize is not given, a tab size of 8 characters is assumed.";
2245
2246static PyObject*
2247string_expandtabs(PyStringObject *self, PyObject *args)
2248{
2249 const char *e, *p;
2250 char *q;
2251 int i, j;
2252 PyObject *u;
2253 int tabsize = 8;
2254
2255 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
2256 return NULL;
2257
Thomas Wouters7e474022000-07-16 12:04:32 +00002258 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 i = j = 0;
2260 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
2261 for (p = PyString_AS_STRING(self); p < e; p++)
2262 if (*p == '\t') {
2263 if (tabsize > 0)
2264 j += tabsize - (j % tabsize);
2265 }
2266 else {
2267 j++;
2268 if (*p == '\n' || *p == '\r') {
2269 i += j;
2270 j = 0;
2271 }
2272 }
2273
2274 /* Second pass: create output string and fill it */
2275 u = PyString_FromStringAndSize(NULL, i + j);
2276 if (!u)
2277 return NULL;
2278
2279 j = 0;
2280 q = PyString_AS_STRING(u);
2281
2282 for (p = PyString_AS_STRING(self); p < e; p++)
2283 if (*p == '\t') {
2284 if (tabsize > 0) {
2285 i = tabsize - (j % tabsize);
2286 j += i;
2287 while (i--)
2288 *q++ = ' ';
2289 }
2290 }
2291 else {
2292 j++;
2293 *q++ = *p;
2294 if (*p == '\n' || *p == '\r')
2295 j = 0;
2296 }
2297
2298 return u;
2299}
2300
Tim Peters8fa5dd02001-09-12 02:18:30 +00002301static PyObject *
2302pad(PyStringObject *self, int left, int right, char fill)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002303{
2304 PyObject *u;
2305
2306 if (left < 0)
2307 left = 0;
2308 if (right < 0)
2309 right = 0;
2310
Tim Peters8fa5dd02001-09-12 02:18:30 +00002311 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002312 Py_INCREF(self);
2313 return (PyObject *)self;
2314 }
2315
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002316 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002317 left + PyString_GET_SIZE(self) + right);
2318 if (u) {
2319 if (left)
2320 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002321 memcpy(PyString_AS_STRING(u) + left,
2322 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002323 PyString_GET_SIZE(self));
2324 if (right)
2325 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2326 fill, right);
2327 }
2328
2329 return u;
2330}
2331
2332static char ljust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002333"S.ljust(width) -> string\n"
2334"\n"
2335"Return S left justified in a string of length width. Padding is\n"
2336"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337
2338static PyObject *
2339string_ljust(PyStringObject *self, PyObject *args)
2340{
2341 int width;
2342 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2343 return NULL;
2344
Tim Peters8fa5dd02001-09-12 02:18:30 +00002345 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002346 Py_INCREF(self);
2347 return (PyObject*) self;
2348 }
2349
2350 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2351}
2352
2353
2354static char rjust__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002355"S.rjust(width) -> string\n"
2356"\n"
2357"Return S right justified in a string of length width. Padding is\n"
2358"done using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359
2360static PyObject *
2361string_rjust(PyStringObject *self, PyObject *args)
2362{
2363 int width;
2364 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2365 return NULL;
2366
Tim Peters8fa5dd02001-09-12 02:18:30 +00002367 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 Py_INCREF(self);
2369 return (PyObject*) self;
2370 }
2371
2372 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2373}
2374
2375
2376static char center__doc__[] =
Tim Peters8fa5dd02001-09-12 02:18:30 +00002377"S.center(width) -> string\n"
2378"\n"
2379"Return S centered in a string of length width. Padding is done\n"
2380"using spaces.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002381
2382static PyObject *
2383string_center(PyStringObject *self, PyObject *args)
2384{
2385 int marg, left;
2386 int width;
2387
2388 if (!PyArg_ParseTuple(args, "i:center", &width))
2389 return NULL;
2390
Tim Peters8fa5dd02001-09-12 02:18:30 +00002391 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392 Py_INCREF(self);
2393 return (PyObject*) self;
2394 }
2395
2396 marg = width - PyString_GET_SIZE(self);
2397 left = marg / 2 + (marg & width & 1);
2398
2399 return pad(self, left, marg - left, ' ');
2400}
2401
Walter Dörwald068325e2002-04-15 13:36:47 +00002402static char zfill__doc__[] =
2403"S.zfill(width) -> string\n"
2404"\n"
2405"Pad a numeric string S with zeros on the left, to fill a field\n"
2406"of the specified width. The string S is never truncated.";
2407
2408static PyObject *
2409string_zfill(PyStringObject *self, PyObject *args)
2410{
2411 int fill;
2412 PyObject *s;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002413 char *p;
Walter Dörwald068325e2002-04-15 13:36:47 +00002414
2415 int width;
2416 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2417 return NULL;
2418
2419 if (PyString_GET_SIZE(self) >= width) {
Walter Dörwald0fe940c2002-04-15 18:42:15 +00002420 if (PyString_CheckExact(self)) {
2421 Py_INCREF(self);
2422 return (PyObject*) self;
2423 }
2424 else
2425 return PyString_FromStringAndSize(
2426 PyString_AS_STRING(self),
2427 PyString_GET_SIZE(self)
2428 );
Walter Dörwald068325e2002-04-15 13:36:47 +00002429 }
2430
2431 fill = width - PyString_GET_SIZE(self);
2432
2433 s = pad(self, fill, 0, '0');
2434
2435 if (s == NULL)
2436 return NULL;
2437
2438 p = PyString_AS_STRING(s);
2439 if (p[fill] == '+' || p[fill] == '-') {
2440 /* move sign to beginning of string */
2441 p[0] = p[fill];
2442 p[fill] = '0';
2443 }
2444
2445 return (PyObject*) s;
2446}
2447
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448static char isspace__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002449"S.isspace() -> bool\n"
Tim Peters8fa5dd02001-09-12 02:18:30 +00002450"\n"
Guido van Rossum77f6a652002-04-03 22:41:51 +00002451"Return True if there are only whitespace characters in S,\n"
2452"False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002453
2454static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002455string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002456{
Fred Drakeba096332000-07-09 07:04:36 +00002457 register const unsigned char *p
2458 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002459 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002460
Guido van Rossum4c08d552000-03-10 22:55:18 +00002461 /* Shortcut for single character strings */
2462 if (PyString_GET_SIZE(self) == 1 &&
2463 isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002464 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002466 /* Special case for empty strings */
2467 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002468 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002469
Guido van Rossum4c08d552000-03-10 22:55:18 +00002470 e = p + PyString_GET_SIZE(self);
2471 for (; p < e; p++) {
2472 if (!isspace(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002473 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002474 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002475 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002476}
2477
2478
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002479static char isalpha__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002480"S.isalpha() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002481\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002482Return True if all characters in S are alphabetic\n\
2483and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002484
2485static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002486string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002487{
Fred Drakeba096332000-07-09 07:04:36 +00002488 register const unsigned char *p
2489 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002490 register const unsigned char *e;
2491
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002492 /* Shortcut for single character strings */
2493 if (PyString_GET_SIZE(self) == 1 &&
2494 isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002495 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002496
2497 /* Special case for empty strings */
2498 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002499 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002500
2501 e = p + PyString_GET_SIZE(self);
2502 for (; p < e; p++) {
2503 if (!isalpha(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002504 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002505 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002506 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002507}
2508
2509
2510static char isalnum__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002511"S.isalnum() -> bool\n\
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002512\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002513Return True if all characters in S are alphanumeric\n\
2514and there is at least one character in S, False otherwise.";
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002515
2516static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002517string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002518{
Fred Drakeba096332000-07-09 07:04:36 +00002519 register const unsigned char *p
2520 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002521 register const unsigned char *e;
2522
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002523 /* Shortcut for single character strings */
2524 if (PyString_GET_SIZE(self) == 1 &&
2525 isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002526 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002527
2528 /* Special case for empty strings */
2529 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002530 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002531
2532 e = p + PyString_GET_SIZE(self);
2533 for (; p < e; p++) {
2534 if (!isalnum(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002535 return PyBool_FromLong(0);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002536 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002537 return PyBool_FromLong(1);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002538}
2539
2540
Guido van Rossum4c08d552000-03-10 22:55:18 +00002541static char isdigit__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002542"S.isdigit() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002543\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002544Return True if there are only digit characters in S,\n\
2545False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002546
2547static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002548string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002549{
Fred Drakeba096332000-07-09 07:04:36 +00002550 register const unsigned char *p
2551 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002552 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002553
Guido van Rossum4c08d552000-03-10 22:55:18 +00002554 /* Shortcut for single character strings */
2555 if (PyString_GET_SIZE(self) == 1 &&
2556 isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002557 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002558
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002559 /* Special case for empty strings */
2560 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002561 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002562
Guido van Rossum4c08d552000-03-10 22:55:18 +00002563 e = p + PyString_GET_SIZE(self);
2564 for (; p < e; p++) {
2565 if (!isdigit(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002566 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002567 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002568 return PyBool_FromLong(1);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002569}
2570
2571
2572static char islower__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002573"S.islower() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002574\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002575Return True if all cased characters in S are lowercase and there is\n\
2576at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002577
2578static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002579string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002580{
Fred Drakeba096332000-07-09 07:04:36 +00002581 register const unsigned char *p
2582 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002583 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002584 int cased;
2585
Guido van Rossum4c08d552000-03-10 22:55:18 +00002586 /* Shortcut for single character strings */
2587 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002588 return PyBool_FromLong(islower(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002589
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002590 /* Special case for empty strings */
2591 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002592 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002593
Guido van Rossum4c08d552000-03-10 22:55:18 +00002594 e = p + PyString_GET_SIZE(self);
2595 cased = 0;
2596 for (; p < e; p++) {
2597 if (isupper(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002598 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002599 else if (!cased && islower(*p))
2600 cased = 1;
2601 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002602 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002603}
2604
2605
2606static char isupper__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002607"S.isupper() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002608\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002609Return True if all cased characters in S are uppercase and there is\n\
2610at least one cased character in S, False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002611
2612static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002613string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002614{
Fred Drakeba096332000-07-09 07:04:36 +00002615 register const unsigned char *p
2616 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002617 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002618 int cased;
2619
Guido van Rossum4c08d552000-03-10 22:55:18 +00002620 /* Shortcut for single character strings */
2621 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002622 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002623
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002624 /* Special case for empty strings */
2625 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002626 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002627
Guido van Rossum4c08d552000-03-10 22:55:18 +00002628 e = p + PyString_GET_SIZE(self);
2629 cased = 0;
2630 for (; p < e; p++) {
2631 if (islower(*p))
Guido van Rossum77f6a652002-04-03 22:41:51 +00002632 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002633 else if (!cased && isupper(*p))
2634 cased = 1;
2635 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002636 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002637}
2638
2639
2640static char istitle__doc__[] =
Guido van Rossum77f6a652002-04-03 22:41:51 +00002641"S.istitle() -> bool\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002642\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002643Return True if S is a titlecased string, i.e. uppercase characters\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002644may only follow uncased characters and lowercase characters only cased\n\
Guido van Rossum77f6a652002-04-03 22:41:51 +00002645ones. Return False otherwise.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002646
2647static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002648string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002649{
Fred Drakeba096332000-07-09 07:04:36 +00002650 register const unsigned char *p
2651 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002652 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002653 int cased, previous_is_cased;
2654
Guido van Rossum4c08d552000-03-10 22:55:18 +00002655 /* Shortcut for single character strings */
2656 if (PyString_GET_SIZE(self) == 1)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002657 return PyBool_FromLong(isupper(*p) != 0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002658
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002659 /* Special case for empty strings */
2660 if (PyString_GET_SIZE(self) == 0)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002661 return PyBool_FromLong(0);
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002662
Guido van Rossum4c08d552000-03-10 22:55:18 +00002663 e = p + PyString_GET_SIZE(self);
2664 cased = 0;
2665 previous_is_cased = 0;
2666 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002667 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002668
2669 if (isupper(ch)) {
2670 if (previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002671 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002672 previous_is_cased = 1;
2673 cased = 1;
2674 }
2675 else if (islower(ch)) {
2676 if (!previous_is_cased)
Guido van Rossum77f6a652002-04-03 22:41:51 +00002677 return PyBool_FromLong(0);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002678 previous_is_cased = 1;
2679 cased = 1;
2680 }
2681 else
2682 previous_is_cased = 0;
2683 }
Guido van Rossum77f6a652002-04-03 22:41:51 +00002684 return PyBool_FromLong(cased);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002685}
2686
2687
2688static char splitlines__doc__[] =
Fred Drake2bae4fa2001-10-13 15:57:55 +00002689"S.splitlines([keepends]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002690\n\
2691Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002692Line breaks are not included in the resulting list unless keepends\n\
2693is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002694
2695#define SPLIT_APPEND(data, left, right) \
2696 str = PyString_FromStringAndSize(data + left, right - left); \
2697 if (!str) \
2698 goto onError; \
2699 if (PyList_Append(list, str)) { \
2700 Py_DECREF(str); \
2701 goto onError; \
2702 } \
2703 else \
2704 Py_DECREF(str);
2705
2706static PyObject*
2707string_splitlines(PyStringObject *self, PyObject *args)
2708{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002709 register int i;
2710 register int j;
2711 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002712 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002713 PyObject *list;
2714 PyObject *str;
2715 char *data;
2716
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002717 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002718 return NULL;
2719
2720 data = PyString_AS_STRING(self);
2721 len = PyString_GET_SIZE(self);
2722
Guido van Rossum4c08d552000-03-10 22:55:18 +00002723 list = PyList_New(0);
2724 if (!list)
2725 goto onError;
2726
2727 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002728 int eol;
2729
Guido van Rossum4c08d552000-03-10 22:55:18 +00002730 /* Find a line and append it */
2731 while (i < len && data[i] != '\n' && data[i] != '\r')
2732 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002733
2734 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002735 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002736 if (i < len) {
2737 if (data[i] == '\r' && i + 1 < len &&
2738 data[i+1] == '\n')
2739 i += 2;
2740 else
2741 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002742 if (keepends)
2743 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002744 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002745 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002746 j = i;
2747 }
2748 if (j < len) {
2749 SPLIT_APPEND(data, j, len);
2750 }
2751
2752 return list;
2753
2754 onError:
2755 Py_DECREF(list);
2756 return NULL;
2757}
2758
2759#undef SPLIT_APPEND
2760
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002761
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002762static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002763string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002764 /* Counterparts of the obsolete stropmodule functions; except
2765 string.maketrans(). */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002766 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2767 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2768 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2769 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002770 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2771 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2772 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2773 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2774 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2775 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2776 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00002777 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
2778 capitalize__doc__},
2779 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2780 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
2781 endswith__doc__},
2782 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2783 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2784 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
2785 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2786 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2787 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2788 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
2789 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
2790 startswith__doc__},
2791 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
2792 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
2793 swapcase__doc__},
2794 {"translate", (PyCFunction)string_translate, METH_VARARGS,
2795 translate__doc__},
2796 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2797 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2798 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2799 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2800 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
2801 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2802 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2803 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
2804 expandtabs__doc__},
2805 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
2806 splitlines__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002807 {NULL, NULL} /* sentinel */
2808};
2809
Guido van Rossumae960af2001-08-30 03:11:59 +00002810staticforward PyObject *
2811str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2812
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002813static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002814string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002815{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002816 PyObject *x = NULL;
2817 static char *kwlist[] = {"object", 0};
2818
Guido van Rossumae960af2001-08-30 03:11:59 +00002819 if (type != &PyString_Type)
2820 return str_subtype_new(type, args, kwds);
Tim Peters6d6c1a32001-08-02 04:15:00 +00002821 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2822 return NULL;
2823 if (x == NULL)
2824 return PyString_FromString("");
2825 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002826}
2827
Guido van Rossumae960af2001-08-30 03:11:59 +00002828static PyObject *
2829str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2830{
Tim Petersaf90b3e2001-09-12 05:18:58 +00002831 PyObject *tmp, *pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002832 int n;
2833
2834 assert(PyType_IsSubtype(type, &PyString_Type));
2835 tmp = string_new(&PyString_Type, args, kwds);
2836 if (tmp == NULL)
2837 return NULL;
Tim Peters5a49ade2001-09-11 01:41:59 +00002838 assert(PyString_CheckExact(tmp));
Tim Petersaf90b3e2001-09-12 05:18:58 +00002839 n = PyString_GET_SIZE(tmp);
2840 pnew = type->tp_alloc(type, n);
2841 if (pnew != NULL) {
2842 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002843 ((PyStringObject *)pnew)->ob_shash =
2844 ((PyStringObject *)tmp)->ob_shash;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002845 ((PyStringObject *)pnew)->ob_sinterned =
2846 ((PyStringObject *)tmp)->ob_sinterned;
Tim Petersaf90b3e2001-09-12 05:18:58 +00002847 }
Guido van Rossum29d55a32001-08-31 16:11:15 +00002848 Py_DECREF(tmp);
Tim Petersaf90b3e2001-09-12 05:18:58 +00002849 return pnew;
Guido van Rossumae960af2001-08-30 03:11:59 +00002850}
2851
Tim Peters6d6c1a32001-08-02 04:15:00 +00002852static char string_doc[] =
2853"str(object) -> string\n\
2854\n\
2855Return a nice string representation of the object.\n\
2856If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002857
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002858PyTypeObject PyString_Type = {
2859 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002860 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002861 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002862 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002863 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002864 (destructor)string_dealloc, /* tp_dealloc */
2865 (printfunc)string_print, /* tp_print */
2866 0, /* tp_getattr */
2867 0, /* tp_setattr */
2868 0, /* tp_compare */
2869 (reprfunc)string_repr, /* tp_repr */
2870 0, /* tp_as_number */
2871 &string_as_sequence, /* tp_as_sequence */
2872 0, /* tp_as_mapping */
2873 (hashfunc)string_hash, /* tp_hash */
2874 0, /* tp_call */
2875 (reprfunc)string_str, /* tp_str */
2876 PyObject_GenericGetAttr, /* tp_getattro */
2877 0, /* tp_setattro */
2878 &string_as_buffer, /* tp_as_buffer */
Guido van Rossumae960af2001-08-30 03:11:59 +00002879 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
Tim Peters6d6c1a32001-08-02 04:15:00 +00002880 string_doc, /* tp_doc */
2881 0, /* tp_traverse */
2882 0, /* tp_clear */
2883 (richcmpfunc)string_richcompare, /* tp_richcompare */
2884 0, /* tp_weaklistoffset */
2885 0, /* tp_iter */
2886 0, /* tp_iternext */
2887 string_methods, /* tp_methods */
2888 0, /* tp_members */
2889 0, /* tp_getset */
2890 0, /* tp_base */
2891 0, /* tp_dict */
2892 0, /* tp_descr_get */
2893 0, /* tp_descr_set */
2894 0, /* tp_dictoffset */
2895 0, /* tp_init */
2896 0, /* tp_alloc */
2897 string_new, /* tp_new */
Neil Schemenauer510492e2002-04-12 03:05:19 +00002898 PyObject_Del, /* tp_free */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002899};
2900
2901void
Fred Drakeba096332000-07-09 07:04:36 +00002902PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002903{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002904 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002905 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002906 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002907 if (w == NULL || !PyString_Check(*pv)) {
2908 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002909 *pv = NULL;
2910 return;
2911 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002912 v = string_concat((PyStringObject *) *pv, w);
2913 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002914 *pv = v;
2915}
2916
Guido van Rossum013142a1994-08-30 08:19:36 +00002917void
Fred Drakeba096332000-07-09 07:04:36 +00002918PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002919{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002920 PyString_Concat(pv, w);
2921 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002922}
2923
2924
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002925/* The following function breaks the notion that strings are immutable:
2926 it changes the size of a string. We get away with this only if there
2927 is only one module referencing the object. You can also think of it
2928 as creating a new string object and destroying the old one, only
2929 more efficiently. In any case, don't use this if the string may
2930 already be known to some other part of the code... */
2931
2932int
Fred Drakeba096332000-07-09 07:04:36 +00002933_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002934{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002935 register PyObject *v;
2936 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002937 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002938 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002939 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002940 Py_DECREF(v);
2941 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002942 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002943 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002944 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002945#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002946 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002947#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002948 _Py_ForgetReference(v);
2949 *pv = (PyObject *)
Neil Schemenauer510492e2002-04-12 03:05:19 +00002950 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002951 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002952 if (*pv == NULL) {
Neil Schemenauer510492e2002-04-12 03:05:19 +00002953 PyObject_Del(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002954 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002955 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002956 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002957 _Py_NewReference(*pv);
2958 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002959 sv->ob_size = newsize;
2960 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002961 return 0;
2962}
Guido van Rossume5372401993-03-16 12:15:04 +00002963
2964/* Helpers for formatstring */
2965
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002966static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002967getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002968{
2969 int argidx = *p_argidx;
2970 if (argidx < arglen) {
2971 (*p_argidx)++;
2972 if (arglen < 0)
2973 return args;
2974 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002975 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002976 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002977 PyErr_SetString(PyExc_TypeError,
2978 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002979 return NULL;
2980}
2981
Tim Peters38fd5b62000-09-21 05:43:11 +00002982/* Format codes
2983 * F_LJUST '-'
2984 * F_SIGN '+'
2985 * F_BLANK ' '
2986 * F_ALT '#'
2987 * F_ZERO '0'
2988 */
Guido van Rossume5372401993-03-16 12:15:04 +00002989#define F_LJUST (1<<0)
2990#define F_SIGN (1<<1)
2991#define F_BLANK (1<<2)
2992#define F_ALT (1<<3)
2993#define F_ZERO (1<<4)
2994
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002995static int
Fred Drakeba096332000-07-09 07:04:36 +00002996formatfloat(char *buf, size_t buflen, int flags,
2997 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002998{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002999 /* fmt = '%#.' + `prec` + `type`
3000 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00003001 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00003002 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003003 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003004 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003005 if (prec < 0)
3006 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00003007 if (type == 'f' && fabs(x)/1e25 >= 1e25)
3008 type = 'g';
Tim Peters885d4572001-11-28 20:27:42 +00003009 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
3010 (flags&F_ALT) ? "#" : "",
3011 prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003012 /* worst case length calc to ensure no buffer overrun:
3013 fmt = %#.<prec>g
3014 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003015 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003016 len = 1 + prec + 1 + 2 + 5 = 9 + prec
3017 If prec=0 the effective precision is 1 (the leading digit is
3018 always given), therefore increase by one to 10+prec. */
3019 if (buflen <= (size_t)10 + (size_t)prec) {
3020 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00003021 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003022 return -1;
3023 }
Tim Peters885d4572001-11-28 20:27:42 +00003024 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003025 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003026}
3027
Tim Peters38fd5b62000-09-21 05:43:11 +00003028/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3029 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3030 * Python's regular ints.
3031 * Return value: a new PyString*, or NULL if error.
3032 * . *pbuf is set to point into it,
3033 * *plen set to the # of chars following that.
3034 * Caller must decref it when done using pbuf.
3035 * The string starting at *pbuf is of the form
3036 * "-"? ("0x" | "0X")? digit+
3037 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003038 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00003039 * There will be at least prec digits, zero-filled on the left if
3040 * necessary to get that many.
3041 * val object to be converted
3042 * flags bitmask of format flags; only F_ALT is looked at
3043 * prec minimum number of digits; 0-fill on left if needed
3044 * type a character in [duoxX]; u acts the same as d
3045 *
3046 * CAUTION: o, x and X conversions on regular ints can never
3047 * produce a '-' sign, but can for Python's unbounded ints.
3048 */
3049PyObject*
3050_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
3051 char **pbuf, int *plen)
3052{
3053 PyObject *result = NULL;
3054 char *buf;
3055 int i;
3056 int sign; /* 1 if '-', else 0 */
3057 int len; /* number of characters */
3058 int numdigits; /* len == numnondigits + numdigits */
3059 int numnondigits = 0;
3060
3061 switch (type) {
3062 case 'd':
3063 case 'u':
3064 result = val->ob_type->tp_str(val);
3065 break;
3066 case 'o':
3067 result = val->ob_type->tp_as_number->nb_oct(val);
3068 break;
3069 case 'x':
3070 case 'X':
3071 numnondigits = 2;
3072 result = val->ob_type->tp_as_number->nb_hex(val);
3073 break;
3074 default:
3075 assert(!"'type' not in [duoxX]");
3076 }
3077 if (!result)
3078 return NULL;
3079
3080 /* To modify the string in-place, there can only be one reference. */
3081 if (result->ob_refcnt != 1) {
3082 PyErr_BadInternalCall();
3083 return NULL;
3084 }
3085 buf = PyString_AsString(result);
3086 len = PyString_Size(result);
3087 if (buf[len-1] == 'L') {
3088 --len;
3089 buf[len] = '\0';
3090 }
3091 sign = buf[0] == '-';
3092 numnondigits += sign;
3093 numdigits = len - numnondigits;
3094 assert(numdigits > 0);
3095
Tim Petersfff53252001-04-12 18:38:48 +00003096 /* Get rid of base marker unless F_ALT */
3097 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003098 /* Need to skip 0x, 0X or 0. */
3099 int skipped = 0;
3100 switch (type) {
3101 case 'o':
3102 assert(buf[sign] == '0');
3103 /* If 0 is only digit, leave it alone. */
3104 if (numdigits > 1) {
3105 skipped = 1;
3106 --numdigits;
3107 }
3108 break;
3109 case 'x':
3110 case 'X':
3111 assert(buf[sign] == '0');
3112 assert(buf[sign + 1] == 'x');
3113 skipped = 2;
3114 numnondigits -= 2;
3115 break;
3116 }
3117 if (skipped) {
3118 buf += skipped;
3119 len -= skipped;
3120 if (sign)
3121 buf[0] = '-';
3122 }
3123 assert(len == numnondigits + numdigits);
3124 assert(numdigits > 0);
3125 }
3126
3127 /* Fill with leading zeroes to meet minimum width. */
3128 if (prec > numdigits) {
3129 PyObject *r1 = PyString_FromStringAndSize(NULL,
3130 numnondigits + prec);
3131 char *b1;
3132 if (!r1) {
3133 Py_DECREF(result);
3134 return NULL;
3135 }
3136 b1 = PyString_AS_STRING(r1);
3137 for (i = 0; i < numnondigits; ++i)
3138 *b1++ = *buf++;
3139 for (i = 0; i < prec - numdigits; i++)
3140 *b1++ = '0';
3141 for (i = 0; i < numdigits; i++)
3142 *b1++ = *buf++;
3143 *b1 = '\0';
3144 Py_DECREF(result);
3145 result = r1;
3146 buf = PyString_AS_STRING(result);
3147 len = numnondigits + prec;
3148 }
3149
3150 /* Fix up case for hex conversions. */
3151 switch (type) {
3152 case 'x':
3153 /* Need to convert all upper case letters to lower case. */
3154 for (i = 0; i < len; i++)
3155 if (buf[i] >= 'A' && buf[i] <= 'F')
3156 buf[i] += 'a'-'A';
3157 break;
3158 case 'X':
3159 /* Need to convert 0x to 0X (and -0x to -0X). */
3160 if (buf[sign + 1] == 'x')
3161 buf[sign + 1] = 'X';
3162 break;
3163 }
3164 *pbuf = buf;
3165 *plen = len;
3166 return result;
3167}
3168
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003169static int
Fred Drakeba096332000-07-09 07:04:36 +00003170formatint(char *buf, size_t buflen, int flags,
3171 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003172{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003173 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00003174 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
3175 + 1 + 1 = 24 */
3176 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00003177 long x;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003178
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003179 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003180 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003181 if (prec < 0)
3182 prec = 1;
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003183
3184 if ((flags & F_ALT) &&
3185 (type == 'x' || type == 'X')) {
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003186 /* When converting under %#x or %#X, there are a number
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003187 * of issues that cause pain:
3188 * - when 0 is being converted, the C standard leaves off
3189 * the '0x' or '0X', which is inconsistent with other
3190 * %#x/%#X conversions and inconsistent with Python's
3191 * hex() function
3192 * - there are platforms that violate the standard and
3193 * convert 0 with the '0x' or '0X'
3194 * (Metrowerks, Compaq Tru64)
3195 * - there are platforms that give '0x' when converting
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003196 * under %#X, but convert 0 in accordance with the
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003197 * standard (OS/2 EMX)
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003198 *
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003199 * We can achieve the desired consistency by inserting our
3200 * own '0x' or '0X' prefix, and substituting %x/%X in place
3201 * of %#x/%#X.
3202 *
3203 * Note that this is the same approach as used in
3204 * formatint() in unicodeobject.c
3205 */
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003206 PyOS_snprintf(fmt, sizeof(fmt), "0%c%%.%dl%c",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003207 type, prec, type);
3208 }
3209 else {
3210 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%dl%c",
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003211 (flags&F_ALT) ? "#" : "",
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003212 prec, type);
3213 }
3214
Tim Peters38fd5b62000-09-21 05:43:11 +00003215 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Andrew MacIntyre5e9c80d2002-02-28 11:38:24 +00003216 * worst case buf = '0x' + [0-9]*prec, where prec >= 11
3217 */
Tim Peters38fd5b62000-09-21 05:43:11 +00003218 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003219 PyErr_SetString(PyExc_OverflowError,
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003220 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003221 return -1;
3222 }
Tim Peters885d4572001-11-28 20:27:42 +00003223 PyOS_snprintf(buf, buflen, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003224 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00003225}
3226
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003227static int
Fred Drakeba096332000-07-09 07:04:36 +00003228formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00003229{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003230 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003231 if (PyString_Check(v)) {
3232 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003233 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003234 }
3235 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003236 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003237 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00003238 }
3239 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003240 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00003241}
3242
Guido van Rossum013142a1994-08-30 08:19:36 +00003243
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003244/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
3245
3246 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
3247 chars are formatted. XXX This is a magic number. Each formatting
3248 routine does bounds checking to ensure no overflow, but a better
3249 solution may be to malloc a buffer of appropriate size for each
3250 format. For now, the current solution is sufficient.
3251*/
3252#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00003253
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003254PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003255PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00003256{
3257 char *fmt, *res;
3258 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00003259 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003260 PyObject *result, *orig_args;
3261#ifdef Py_USING_UNICODE
3262 PyObject *v, *w;
3263#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003264 PyObject *dict = NULL;
3265 if (format == NULL || !PyString_Check(format) || args == NULL) {
3266 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00003267 return NULL;
3268 }
Guido van Rossum90daa872000-04-10 13:47:21 +00003269 orig_args = args;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003270 fmt = PyString_AS_STRING(format);
3271 fmtcnt = PyString_GET_SIZE(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003272 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003273 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00003274 if (result == NULL)
3275 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003276 res = PyString_AsString(result);
3277 if (PyTuple_Check(args)) {
Jeremy Hylton7802a532001-12-06 15:18:48 +00003278 arglen = PyTuple_GET_SIZE(args);
Guido van Rossume5372401993-03-16 12:15:04 +00003279 argidx = 0;
3280 }
3281 else {
3282 arglen = -1;
3283 argidx = -2;
3284 }
Guido van Rossum013142a1994-08-30 08:19:36 +00003285 if (args->ob_type->tp_as_mapping)
3286 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00003287 while (--fmtcnt >= 0) {
3288 if (*fmt != '%') {
3289 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003290 rescnt = fmtcnt + 100;
3291 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003292 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003293 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003294 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003295 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00003296 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003297 }
3298 *res++ = *fmt++;
3299 }
3300 else {
3301 /* Got a format specifier */
3302 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003303 int width = -1;
3304 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00003305 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00003306 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003307 PyObject *v = NULL;
3308 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003309 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00003310 int sign;
3311 int len;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003312 char formatbuf[FORMATBUFLEN];
3313 /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003314#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003315 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003316 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003317#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003318
Guido van Rossumda9c2711996-12-05 21:58:58 +00003319 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00003320 if (*fmt == '(') {
3321 char *keystart;
3322 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003323 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00003324 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003325
3326 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003327 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003328 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00003329 goto error;
3330 }
3331 ++fmt;
3332 --fmtcnt;
3333 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00003334 /* Skip over balanced parentheses */
3335 while (pcount > 0 && --fmtcnt >= 0) {
3336 if (*fmt == ')')
3337 --pcount;
3338 else if (*fmt == '(')
3339 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003340 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003341 }
3342 keylen = fmt - keystart - 1;
3343 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003344 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003345 "incomplete format key");
3346 goto error;
3347 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003348 key = PyString_FromStringAndSize(keystart,
3349 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003350 if (key == NULL)
3351 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003352 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003353 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003354 args_owned = 0;
3355 }
3356 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003357 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003358 if (args == NULL) {
3359 goto error;
3360 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003361 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003362 arglen = -1;
3363 argidx = -2;
3364 }
Guido van Rossume5372401993-03-16 12:15:04 +00003365 while (--fmtcnt >= 0) {
3366 switch (c = *fmt++) {
3367 case '-': flags |= F_LJUST; continue;
3368 case '+': flags |= F_SIGN; continue;
3369 case ' ': flags |= F_BLANK; continue;
3370 case '#': flags |= F_ALT; continue;
3371 case '0': flags |= F_ZERO; continue;
3372 }
3373 break;
3374 }
3375 if (c == '*') {
3376 v = getnextarg(args, arglen, &argidx);
3377 if (v == NULL)
3378 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003379 if (!PyInt_Check(v)) {
3380 PyErr_SetString(PyExc_TypeError,
3381 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003382 goto error;
3383 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003384 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003385 if (width < 0) {
3386 flags |= F_LJUST;
3387 width = -width;
3388 }
Guido van Rossume5372401993-03-16 12:15:04 +00003389 if (--fmtcnt >= 0)
3390 c = *fmt++;
3391 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003392 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003393 width = c - '0';
3394 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003395 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003396 if (!isdigit(c))
3397 break;
3398 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003399 PyErr_SetString(
3400 PyExc_ValueError,
3401 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003402 goto error;
3403 }
3404 width = width*10 + (c - '0');
3405 }
3406 }
3407 if (c == '.') {
3408 prec = 0;
3409 if (--fmtcnt >= 0)
3410 c = *fmt++;
3411 if (c == '*') {
3412 v = getnextarg(args, arglen, &argidx);
3413 if (v == NULL)
3414 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003415 if (!PyInt_Check(v)) {
3416 PyErr_SetString(
3417 PyExc_TypeError,
3418 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003419 goto error;
3420 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003421 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003422 if (prec < 0)
3423 prec = 0;
3424 if (--fmtcnt >= 0)
3425 c = *fmt++;
3426 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003427 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003428 prec = c - '0';
3429 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003430 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003431 if (!isdigit(c))
3432 break;
3433 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003434 PyErr_SetString(
3435 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003436 "prec too big");
3437 goto error;
3438 }
3439 prec = prec*10 + (c - '0');
3440 }
3441 }
3442 } /* prec */
3443 if (fmtcnt >= 0) {
3444 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003445 if (--fmtcnt >= 0)
3446 c = *fmt++;
3447 }
3448 }
3449 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003450 PyErr_SetString(PyExc_ValueError,
3451 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003452 goto error;
3453 }
3454 if (c != '%') {
3455 v = getnextarg(args, arglen, &argidx);
3456 if (v == NULL)
3457 goto error;
3458 }
3459 sign = 0;
3460 fill = ' ';
3461 switch (c) {
3462 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003463 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003464 len = 1;
3465 break;
3466 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003467 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003468#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003469 if (PyUnicode_Check(v)) {
3470 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003471 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003472 goto unicode;
3473 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003474#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003475 if (c == 's')
Jeremy Hylton7802a532001-12-06 15:18:48 +00003476 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003477 else
3478 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003479 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003480 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003481 if (!PyString_Check(temp)) {
3482 PyErr_SetString(PyExc_TypeError,
3483 "%s argument has non-string str()");
Jeremy Hylton7802a532001-12-06 15:18:48 +00003484 Py_DECREF(temp);
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003485 goto error;
3486 }
Jeremy Hylton7802a532001-12-06 15:18:48 +00003487 pbuf = PyString_AS_STRING(temp);
3488 len = PyString_GET_SIZE(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003489 if (prec >= 0 && len > prec)
3490 len = prec;
3491 break;
3492 case 'i':
3493 case 'd':
3494 case 'u':
3495 case 'o':
3496 case 'x':
3497 case 'X':
3498 if (c == 'i')
3499 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003500 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003501 temp = _PyString_FormatLong(v, flags,
3502 prec, c, &pbuf, &len);
3503 if (!temp)
3504 goto error;
3505 /* unbounded ints can always produce
3506 a sign character! */
3507 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003508 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003509 else {
3510 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003511 len = formatint(pbuf,
3512 sizeof(formatbuf),
Tim Peters38fd5b62000-09-21 05:43:11 +00003513 flags, prec, c, v);
3514 if (len < 0)
3515 goto error;
3516 /* only d conversion is signed */
3517 sign = c == 'd';
3518 }
3519 if (flags & F_ZERO)
3520 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003521 break;
3522 case 'e':
3523 case 'E':
3524 case 'f':
3525 case 'g':
3526 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003527 pbuf = formatbuf;
Guido van Rossum3aa3fc42002-04-15 13:48:52 +00003528 len = formatfloat(pbuf, sizeof(formatbuf),
3529 flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003530 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003531 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003532 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003533 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003534 fill = '0';
3535 break;
3536 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003537 pbuf = formatbuf;
3538 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003539 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003540 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003541 break;
3542 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003543 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003544 "unsupported format character '%c' (0x%x) "
3545 "at index %i",
3546 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003547 goto error;
3548 }
3549 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003550 if (*pbuf == '-' || *pbuf == '+') {
3551 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003552 len--;
3553 }
3554 else if (flags & F_SIGN)
3555 sign = '+';
3556 else if (flags & F_BLANK)
3557 sign = ' ';
3558 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003559 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003560 }
3561 if (width < len)
3562 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003563 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003564 reslen -= rescnt;
3565 rescnt = width + fmtcnt + 100;
3566 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003567 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003568 return NULL;
Jeremy Hylton7802a532001-12-06 15:18:48 +00003569 res = PyString_AS_STRING(result)
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003570 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003571 }
3572 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003573 if (fill != ' ')
3574 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003575 rescnt--;
3576 if (width > len)
3577 width--;
3578 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003579 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3580 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003581 assert(pbuf[1] == c);
3582 if (fill != ' ') {
3583 *res++ = *pbuf++;
3584 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003585 }
Tim Petersfff53252001-04-12 18:38:48 +00003586 rescnt -= 2;
3587 width -= 2;
3588 if (width < 0)
3589 width = 0;
3590 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003591 }
3592 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003593 do {
3594 --rescnt;
3595 *res++ = fill;
3596 } while (--width > len);
3597 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003598 if (fill == ' ') {
3599 if (sign)
3600 *res++ = sign;
3601 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003602 (c == 'x' || c == 'X')) {
3603 assert(pbuf[0] == '0');
3604 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003605 *res++ = *pbuf++;
3606 *res++ = *pbuf++;
3607 }
3608 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003609 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003610 res += len;
3611 rescnt -= len;
3612 while (--width >= len) {
3613 --rescnt;
3614 *res++ = ' ';
3615 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003616 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003617 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003618 "not all arguments converted");
3619 goto error;
3620 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003621 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003622 } /* '%' */
3623 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003624 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003625 PyErr_SetString(PyExc_TypeError,
3626 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003627 goto error;
3628 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003629 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003630 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003631 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003632 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003633 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003634
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003635#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003636 unicode:
3637 if (args_owned) {
3638 Py_DECREF(args);
3639 args_owned = 0;
3640 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003641 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003642 if (PyTuple_Check(orig_args) && argidx > 0) {
3643 PyObject *v;
3644 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3645 v = PyTuple_New(n);
3646 if (v == NULL)
3647 goto error;
3648 while (--n >= 0) {
3649 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3650 Py_INCREF(w);
3651 PyTuple_SET_ITEM(v, n, w);
3652 }
3653 args = v;
3654 } else {
3655 Py_INCREF(orig_args);
3656 args = orig_args;
3657 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003658 args_owned = 1;
3659 /* Take what we have of the result and let the Unicode formatting
3660 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003661 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003662 if (_PyString_Resize(&result, rescnt))
3663 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003664 fmtcnt = PyString_GET_SIZE(format) - \
3665 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003666 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3667 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003668 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003669 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003670 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003671 if (v == NULL)
3672 goto error;
3673 /* Paste what we have (result) to what the Unicode formatting
3674 function returned (v) and return the result (or error) */
3675 w = PyUnicode_Concat(result, v);
3676 Py_DECREF(result);
3677 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003678 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003679 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003680#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003681
Guido van Rossume5372401993-03-16 12:15:04 +00003682 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003683 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003684 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003685 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003686 }
Guido van Rossume5372401993-03-16 12:15:04 +00003687 return NULL;
3688}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003689
3690
Guido van Rossum2a61e741997-01-18 07:55:05 +00003691
Barry Warsaw4df762f2000-08-16 23:41:01 +00003692/* This dictionary will leak at PyString_Fini() time. That's acceptable
3693 * because PyString_Fini() specifically frees interned strings that are
3694 * only referenced by this dictionary. The CVS log entry for revision 2.45
3695 * says:
3696 *
3697 * Change the Fini function to only remove otherwise unreferenced
3698 * strings from the interned table. There are references in
3699 * hard-to-find static variables all over the interpreter, and it's not
3700 * worth trying to get rid of all those; but "uninterning" isn't fair
3701 * either and may cause subtle failures later -- so we have to keep them
3702 * in the interned table.
3703 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003704static PyObject *interned;
3705
3706void
Fred Drakeba096332000-07-09 07:04:36 +00003707PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003708{
3709 register PyStringObject *s = (PyStringObject *)(*p);
3710 PyObject *t;
3711 if (s == NULL || !PyString_Check(s))
3712 Py_FatalError("PyString_InternInPlace: strings only please!");
3713 if ((t = s->ob_sinterned) != NULL) {
3714 if (t == (PyObject *)s)
3715 return;
3716 Py_INCREF(t);
3717 *p = t;
3718 Py_DECREF(s);
3719 return;
3720 }
3721 if (interned == NULL) {
3722 interned = PyDict_New();
3723 if (interned == NULL)
3724 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003725 }
3726 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3727 Py_INCREF(t);
3728 *p = s->ob_sinterned = t;
3729 Py_DECREF(s);
3730 return;
3731 }
Tim Peters111f6092001-09-12 07:54:51 +00003732 /* Ensure that only true string objects appear in the intern dict,
3733 and as the value of ob_sinterned. */
3734 if (PyString_CheckExact(s)) {
3735 t = (PyObject *)s;
3736 if (PyDict_SetItem(interned, t, t) == 0) {
3737 s->ob_sinterned = t;
3738 return;
3739 }
3740 }
3741 else {
3742 t = PyString_FromStringAndSize(PyString_AS_STRING(s),
3743 PyString_GET_SIZE(s));
3744 if (t != NULL) {
3745 if (PyDict_SetItem(interned, t, t) == 0) {
3746 *p = s->ob_sinterned = t;
3747 Py_DECREF(s);
3748 return;
3749 }
3750 Py_DECREF(t);
3751 }
Guido van Rossum2a61e741997-01-18 07:55:05 +00003752 }
3753 PyErr_Clear();
3754}
3755
3756
3757PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003758PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003759{
3760 PyObject *s = PyString_FromString(cp);
3761 if (s == NULL)
3762 return NULL;
3763 PyString_InternInPlace(&s);
3764 return s;
3765}
3766
Guido van Rossum8cf04761997-08-02 02:57:45 +00003767void
Fred Drakeba096332000-07-09 07:04:36 +00003768PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003769{
3770 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003771 for (i = 0; i < UCHAR_MAX + 1; i++) {
3772 Py_XDECREF(characters[i]);
3773 characters[i] = NULL;
3774 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003775 Py_XDECREF(nullstring);
3776 nullstring = NULL;
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003777 if (interned) {
3778 int pos, changed;
3779 PyObject *key, *value;
3780 do {
3781 changed = 0;
3782 pos = 0;
3783 while (PyDict_Next(interned, &pos, &key, &value)) {
3784 if (key->ob_refcnt == 2 && key == value) {
3785 PyDict_DelItem(interned, key);
3786 changed = 1;
3787 }
3788 }
3789 } while (changed);
3790 }
Guido van Rossum8cf04761997-08-02 02:57:45 +00003791}
Barry Warsawa903ad982001-02-23 16:40:48 +00003792
Barry Warsawa903ad982001-02-23 16:40:48 +00003793void _Py_ReleaseInternedStrings(void)
3794{
3795 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003796 fprintf(stderr, "releasing interned strings\n");
3797 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003798 Py_DECREF(interned);
3799 interned = NULL;
3800 }
3801}