blob: a8e063e794318a1e3ababaa4203184b2ab3af963 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000150PyObject *PyString_Decode(const char *s,
151 int size,
152 const char *encoding,
153 const char *errors)
154{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000155 PyObject *v, *str;
156
157 str = PyString_FromStringAndSize(s, size);
158 if (str == NULL)
159 return NULL;
160 v = PyString_AsDecodedString(str, encoding, errors);
161 Py_DECREF(str);
162 return v;
163}
164
165PyObject *PyString_AsDecodedObject(PyObject *str,
166 const char *encoding,
167 const char *errors)
168{
169 PyObject *v;
170
171 if (!PyString_Check(str)) {
172 PyErr_BadArgument();
173 goto onError;
174 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000175
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000176 if (encoding == NULL) {
177#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000178 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000179#else
180 PyErr_SetString(PyExc_ValueError, "no encoding specified");
181 goto onError;
182#endif
183 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000184
185 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000186 v = PyCodec_Decode(str, encoding, errors);
187 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000188 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000189
190 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000191
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000192 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000193 return NULL;
194}
195
196PyObject *PyString_AsDecodedString(PyObject *str,
197 const char *encoding,
198 const char *errors)
199{
200 PyObject *v;
201
202 v = PyString_AsDecodedObject(str, encoding, errors);
203 if (v == NULL)
204 goto onError;
205
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000206#ifdef Py_USING_UNICODE
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000207 /* Convert Unicode to a string using the default encoding */
208 if (PyUnicode_Check(v)) {
209 PyObject *temp = v;
210 v = PyUnicode_AsEncodedString(v, NULL, NULL);
211 Py_DECREF(temp);
212 if (v == NULL)
213 goto onError;
214 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000215#endif
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000216 if (!PyString_Check(v)) {
217 PyErr_Format(PyExc_TypeError,
218 "decoder did not return a string object (type=%.400s)",
219 v->ob_type->tp_name);
220 Py_DECREF(v);
221 goto onError;
222 }
223
224 return v;
225
226 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000227 return NULL;
228}
229
230PyObject *PyString_Encode(const char *s,
231 int size,
232 const char *encoding,
233 const char *errors)
234{
235 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000236
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000237 str = PyString_FromStringAndSize(s, size);
238 if (str == NULL)
239 return NULL;
240 v = PyString_AsEncodedString(str, encoding, errors);
241 Py_DECREF(str);
242 return v;
243}
244
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000245PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000246 const char *encoding,
247 const char *errors)
248{
249 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000250
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000251 if (!PyString_Check(str)) {
252 PyErr_BadArgument();
253 goto onError;
254 }
255
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000256 if (encoding == NULL) {
257#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000258 encoding = PyUnicode_GetDefaultEncoding();
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000259#else
260 PyErr_SetString(PyExc_ValueError, "no encoding specified");
261 goto onError;
262#endif
263 }
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000264
265 /* Encode via the codec registry */
266 v = PyCodec_Encode(str, encoding, errors);
267 if (v == NULL)
268 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000269
270 return v;
271
272 onError:
273 return NULL;
274}
275
276PyObject *PyString_AsEncodedString(PyObject *str,
277 const char *encoding,
278 const char *errors)
279{
280 PyObject *v;
281
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000282 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000283 if (v == NULL)
284 goto onError;
285
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000286#ifdef Py_USING_UNICODE
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000287 /* Convert Unicode to a string using the default encoding */
288 if (PyUnicode_Check(v)) {
289 PyObject *temp = v;
290 v = PyUnicode_AsEncodedString(v, NULL, NULL);
291 Py_DECREF(temp);
292 if (v == NULL)
293 goto onError;
294 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000295#endif
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000296 if (!PyString_Check(v)) {
297 PyErr_Format(PyExc_TypeError,
298 "encoder did not return a string object (type=%.400s)",
299 v->ob_type->tp_name);
300 Py_DECREF(v);
301 goto onError;
302 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000303
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000304 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000305
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000306 onError:
307 return NULL;
308}
309
Guido van Rossum234f9421993-06-17 12:35:49 +0000310static void
Fred Drakeba096332000-07-09 07:04:36 +0000311string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000312{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000313 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000314}
315
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000316static int
317string_getsize(register PyObject *op)
318{
319 char *s;
320 int len;
321 if (PyString_AsStringAndSize(op, &s, &len))
322 return -1;
323 return len;
324}
325
326static /*const*/ char *
327string_getbuffer(register PyObject *op)
328{
329 char *s;
330 int len;
331 if (PyString_AsStringAndSize(op, &s, &len))
332 return NULL;
333 return s;
334}
335
Guido van Rossumd7047b31995-01-02 19:07:15 +0000336int
Fred Drakeba096332000-07-09 07:04:36 +0000337PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000339 if (!PyString_Check(op))
340 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000341 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000342}
343
344/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000345PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000346{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000347 if (!PyString_Check(op))
348 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000349 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000350}
351
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000352int
353PyString_AsStringAndSize(register PyObject *obj,
354 register char **s,
355 register int *len)
356{
357 if (s == NULL) {
358 PyErr_BadInternalCall();
359 return -1;
360 }
361
362 if (!PyString_Check(obj)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000363#ifdef Py_USING_UNICODE
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000364 if (PyUnicode_Check(obj)) {
365 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
366 if (obj == NULL)
367 return -1;
368 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000369 else
370#endif
371 {
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000372 PyErr_Format(PyExc_TypeError,
373 "expected string or Unicode object, "
374 "%.200s found", obj->ob_type->tp_name);
375 return -1;
376 }
377 }
378
379 *s = PyString_AS_STRING(obj);
380 if (len != NULL)
381 *len = PyString_GET_SIZE(obj);
382 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
383 PyErr_SetString(PyExc_TypeError,
384 "expected string without null bytes");
385 return -1;
386 }
387 return 0;
388}
389
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000390/* Methods */
391
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000392static int
Fred Drakeba096332000-07-09 07:04:36 +0000393string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000394{
395 int i;
396 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000397 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000398 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000399 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000400 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000401 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000403
Thomas Wouters7e474022000-07-16 12:04:32 +0000404 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000405 quote = '\'';
406 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
407 quote = '"';
408
409 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 for (i = 0; i < op->ob_size; i++) {
411 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000412 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000414 else if (c == '\t')
415 fprintf(fp, "\\t");
416 else if (c == '\n')
417 fprintf(fp, "\\n");
418 else if (c == '\r')
419 fprintf(fp, "\\r");
420 else if (c < ' ' || c >= 0x7f)
421 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000422 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000423 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000424 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000425 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000426 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000427}
428
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000429static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000430string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000431{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000432 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
433 PyObject *v;
434 if (newsize > INT_MAX) {
435 PyErr_SetString(PyExc_OverflowError,
436 "string is too large to make repr");
437 }
438 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000439 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000440 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000441 }
442 else {
443 register int i;
444 register char c;
445 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000446 int quote;
447
Thomas Wouters7e474022000-07-16 12:04:32 +0000448 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000449 quote = '\'';
450 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
451 quote = '"';
452
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000453 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000454 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000455 for (i = 0; i < op->ob_size; i++) {
456 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000457 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000458 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000459 else if (c == '\t')
460 *p++ = '\\', *p++ = 't';
461 else if (c == '\n')
462 *p++ = '\\', *p++ = 'n';
463 else if (c == '\r')
464 *p++ = '\\', *p++ = 'r';
465 else if (c < ' ' || c >= 0x7f) {
466 sprintf(p, "\\x%02x", c & 0xff);
467 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000468 }
469 else
470 *p++ = c;
471 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000472 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474 _PyString_Resize(
475 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000476 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000477 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000478}
479
Guido van Rossum189f1df2001-05-01 16:51:53 +0000480static PyObject *
481string_str(PyObject *s)
482{
483 Py_INCREF(s);
484 return s;
485}
486
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000487static int
Fred Drakeba096332000-07-09 07:04:36 +0000488string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000489{
490 return a->ob_size;
491}
492
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000493static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000494string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000495{
496 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000497 register PyStringObject *op;
498 if (!PyString_Check(bb)) {
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000499#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000500 if (PyUnicode_Check(bb))
501 return PyUnicode_Concat((PyObject *)a, bb);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000502#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000503 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000504 "cannot add type \"%.200s\" to string",
505 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000506 return NULL;
507 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000508#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000509 /* Optimize cases with empty left or right operand */
510 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000511 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000512 return bb;
513 }
514 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000515 Py_INCREF(a);
516 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000517 }
518 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000519 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000520 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000521 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000522 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000523 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000524 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000525#ifdef CACHE_HASH
526 op->ob_shash = -1;
527#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000528#ifdef INTERN_STRINGS
529 op->ob_sinterned = NULL;
530#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000531 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
532 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
533 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000534 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000535#undef b
536}
537
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000538static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000539string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540{
541 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000542 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000543 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000544 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000545 if (n < 0)
546 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000547 /* watch out for overflows: the size can overflow int,
548 * and the # of bytes needed can overflow size_t
549 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000550 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000551 if (n && size / n != a->ob_size) {
552 PyErr_SetString(PyExc_OverflowError,
553 "repeated string is too long");
554 return NULL;
555 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000557 Py_INCREF(a);
558 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000559 }
Tim Peters8f422462000-09-09 06:13:41 +0000560 nbytes = size * sizeof(char);
561 if (nbytes / sizeof(char) != (size_t)size ||
562 nbytes + sizeof(PyStringObject) <= nbytes) {
563 PyErr_SetString(PyExc_OverflowError,
564 "repeated string is too long");
565 return NULL;
566 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000567 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000568 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000569 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000570 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000571 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000572#ifdef CACHE_HASH
573 op->ob_shash = -1;
574#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000575#ifdef INTERN_STRINGS
576 op->ob_sinterned = NULL;
577#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000578 for (i = 0; i < size; i += a->ob_size)
579 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
580 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000581 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000582}
583
584/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
585
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000587string_slice(register PyStringObject *a, register int i, register int j)
588 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000589{
590 if (i < 0)
591 i = 0;
592 if (j < 0)
593 j = 0; /* Avoid signed/unsigned bug in next line */
594 if (j > a->ob_size)
595 j = a->ob_size;
596 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000597 Py_INCREF(a);
598 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000599 }
600 if (j < i)
601 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000602 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000603}
604
Guido van Rossum9284a572000-03-07 15:53:43 +0000605static int
Fred Drakeba096332000-07-09 07:04:36 +0000606string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000607{
608 register char *s, *end;
609 register char c;
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000610#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +0000611 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000612 return PyUnicode_Contains(a, el);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000613#endif
Guido van Rossum90daa872000-04-10 13:47:21 +0000614 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000615 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000616 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000617 return -1;
618 }
619 c = PyString_AsString(el)[0];
620 s = PyString_AsString(a);
621 end = s + PyString_Size(a);
622 while (s < end) {
623 if (c == *s++)
624 return 1;
625 }
626 return 0;
627}
628
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000629static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000630string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000631{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000632 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000633 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000634 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000635 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000636 return NULL;
637 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000638 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000639 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000640 if (v == NULL)
641 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000642 else {
643#ifdef COUNT_ALLOCS
644 one_strings++;
645#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000646 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000647 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000648 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000649}
650
Martin v. Löwiscd353062001-05-24 16:56:35 +0000651static PyObject*
652string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000653{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000654 int c;
655 int len_a, len_b;
656 int min_len;
657 PyObject *result;
658
659 /* One of the objects is a string object. Make sure the
660 other one is one, too. */
661 if (a->ob_type != b->ob_type) {
662 result = Py_NotImplemented;
663 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000664 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000665 if (a == b) {
666 switch (op) {
667 case Py_EQ:case Py_LE:case Py_GE:
668 result = Py_True;
669 goto out;
670 case Py_NE:case Py_LT:case Py_GT:
671 result = Py_False;
672 goto out;
673 }
674 }
675 if (op == Py_EQ) {
676 /* Supporting Py_NE here as well does not save
677 much time, since Py_NE is rarely used. */
678 if (a->ob_size == b->ob_size
679 && (a->ob_sval[0] == b->ob_sval[0]
680 && memcmp(a->ob_sval, b->ob_sval,
681 a->ob_size) == 0)) {
682 result = Py_True;
683 } else {
684 result = Py_False;
685 }
686 goto out;
687 }
688 len_a = a->ob_size; len_b = b->ob_size;
689 min_len = (len_a < len_b) ? len_a : len_b;
690 if (min_len > 0) {
691 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
692 if (c==0)
693 c = memcmp(a->ob_sval, b->ob_sval, min_len);
694 }else
695 c = 0;
696 if (c == 0)
697 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
698 switch (op) {
699 case Py_LT: c = c < 0; break;
700 case Py_LE: c = c <= 0; break;
701 case Py_EQ: assert(0); break; /* unreachable */
702 case Py_NE: c = c != 0; break;
703 case Py_GT: c = c > 0; break;
704 case Py_GE: c = c >= 0; break;
705 default:
706 result = Py_NotImplemented;
707 goto out;
708 }
709 result = c ? Py_True : Py_False;
710 out:
711 Py_INCREF(result);
712 return result;
713}
714
715int
716_PyString_Eq(PyObject *o1, PyObject *o2)
717{
718 PyStringObject *a, *b;
719 a = (PyStringObject*)o1;
720 b = (PyStringObject*)o2;
721 return a->ob_size == b->ob_size
722 && *a->ob_sval == *b->ob_sval
723 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000724}
725
Guido van Rossum9bfef441993-03-29 10:43:31 +0000726static long
Fred Drakeba096332000-07-09 07:04:36 +0000727string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000728{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000729 register int len;
730 register unsigned char *p;
731 register long x;
732
733#ifdef CACHE_HASH
734 if (a->ob_shash != -1)
735 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000736#ifdef INTERN_STRINGS
737 if (a->ob_sinterned != NULL)
738 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000739 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000740#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000741#endif
742 len = a->ob_size;
743 p = (unsigned char *) a->ob_sval;
744 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000745 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000746 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000747 x ^= a->ob_size;
748 if (x == -1)
749 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000750#ifdef CACHE_HASH
751 a->ob_shash = x;
752#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000753 return x;
754}
755
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000756static int
Fred Drakeba096332000-07-09 07:04:36 +0000757string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000758{
759 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000760 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000761 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000762 return -1;
763 }
764 *ptr = (void *)self->ob_sval;
765 return self->ob_size;
766}
767
768static int
Fred Drakeba096332000-07-09 07:04:36 +0000769string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000770{
Guido van Rossum045e6881997-09-08 18:30:11 +0000771 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000772 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000773 return -1;
774}
775
776static int
Fred Drakeba096332000-07-09 07:04:36 +0000777string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000778{
779 if ( lenp )
780 *lenp = self->ob_size;
781 return 1;
782}
783
Guido van Rossum1db70701998-10-08 02:18:52 +0000784static int
Fred Drakeba096332000-07-09 07:04:36 +0000785string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000786{
787 if ( index != 0 ) {
788 PyErr_SetString(PyExc_SystemError,
789 "accessing non-existent string segment");
790 return -1;
791 }
792 *ptr = self->ob_sval;
793 return self->ob_size;
794}
795
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000796static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000797 (inquiry)string_length, /*sq_length*/
798 (binaryfunc)string_concat, /*sq_concat*/
799 (intargfunc)string_repeat, /*sq_repeat*/
800 (intargfunc)string_item, /*sq_item*/
801 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000802 0, /*sq_ass_item*/
803 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000804 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000805};
806
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000807static PyBufferProcs string_as_buffer = {
808 (getreadbufferproc)string_buffer_getreadbuf,
809 (getwritebufferproc)string_buffer_getwritebuf,
810 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000811 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000812};
813
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000814
815
816#define LEFTSTRIP 0
817#define RIGHTSTRIP 1
818#define BOTHSTRIP 2
819
820
821static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000822split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000823{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000824 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825 PyObject* item;
826 PyObject *list = PyList_New(0);
827
828 if (list == NULL)
829 return NULL;
830
Guido van Rossum4c08d552000-03-10 22:55:18 +0000831 for (i = j = 0; i < len; ) {
832 while (i < len && isspace(Py_CHARMASK(s[i])))
833 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000834 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000835 while (i < len && !isspace(Py_CHARMASK(s[i])))
836 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000837 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000838 if (maxsplit-- <= 0)
839 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000840 item = PyString_FromStringAndSize(s+j, (int)(i-j));
841 if (item == NULL)
842 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000843 err = PyList_Append(list, item);
844 Py_DECREF(item);
845 if (err < 0)
846 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000847 while (i < len && isspace(Py_CHARMASK(s[i])))
848 i++;
849 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000850 }
851 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000852 if (j < len) {
853 item = PyString_FromStringAndSize(s+j, (int)(len - j));
854 if (item == NULL)
855 goto finally;
856 err = PyList_Append(list, item);
857 Py_DECREF(item);
858 if (err < 0)
859 goto finally;
860 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000861 return list;
862 finally:
863 Py_DECREF(list);
864 return NULL;
865}
866
867
868static char split__doc__[] =
869"S.split([sep [,maxsplit]]) -> list of strings\n\
870\n\
871Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000872delimiter string. If maxsplit is given, at most maxsplit\n\
873splits are done. If sep is not specified, any whitespace string\n\
874is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000875
876static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000877string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000878{
879 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000880 int maxsplit = -1;
881 const char *s = PyString_AS_STRING(self), *sub;
882 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000883
Guido van Rossum4c08d552000-03-10 22:55:18 +0000884 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000885 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000886 if (maxsplit < 0)
887 maxsplit = INT_MAX;
888 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000890 if (PyString_Check(subobj)) {
891 sub = PyString_AS_STRING(subobj);
892 n = PyString_GET_SIZE(subobj);
893 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000894#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +0000895 else if (PyUnicode_Check(subobj))
896 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000897#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +0000898 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
899 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000900 if (n == 0) {
901 PyErr_SetString(PyExc_ValueError, "empty separator");
902 return NULL;
903 }
904
905 list = PyList_New(0);
906 if (list == NULL)
907 return NULL;
908
909 i = j = 0;
910 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000911 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000912 if (maxsplit-- <= 0)
913 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000914 item = PyString_FromStringAndSize(s+j, (int)(i-j));
915 if (item == NULL)
916 goto fail;
917 err = PyList_Append(list, item);
918 Py_DECREF(item);
919 if (err < 0)
920 goto fail;
921 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000922 }
923 else
924 i++;
925 }
926 item = PyString_FromStringAndSize(s+j, (int)(len-j));
927 if (item == NULL)
928 goto fail;
929 err = PyList_Append(list, item);
930 Py_DECREF(item);
931 if (err < 0)
932 goto fail;
933
934 return list;
935
936 fail:
937 Py_DECREF(list);
938 return NULL;
939}
940
941
942static char join__doc__[] =
943"S.join(sequence) -> string\n\
944\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000945Return a string which is the concatenation of the strings in the\n\
946sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000947
948static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000949string_join(PyStringObject *self, PyObject *orig)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000950{
951 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000952 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000953 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000954 char *p;
955 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000956 size_t sz = 0;
957 int i;
Martin v. Löwise3eb1f22001-08-16 13:15:00 +0000958 PyObject *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000959
Tim Peters19fe14e2001-01-19 03:03:47 +0000960 seq = PySequence_Fast(orig, "");
961 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000962 if (PyErr_ExceptionMatches(PyExc_TypeError))
963 PyErr_Format(PyExc_TypeError,
964 "sequence expected, %.80s found",
965 orig->ob_type->tp_name);
966 return NULL;
967 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000968
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000969 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000970 if (seqlen == 0) {
971 Py_DECREF(seq);
972 return PyString_FromString("");
973 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000974 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000975 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000976 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
977 PyErr_Format(PyExc_TypeError,
978 "sequence item 0: expected string,"
979 " %.80s found",
980 item->ob_type->tp_name);
981 Py_DECREF(seq);
982 return NULL;
983 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000984 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000985 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000986 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000987 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000988
Tim Peters19fe14e2001-01-19 03:03:47 +0000989 /* There are at least two things to join. Do a pre-pass to figure out
990 * the total amount of space we'll need (sz), see whether any argument
991 * is absurd, and defer to the Unicode join if appropriate.
992 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000993 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000994 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000995 item = PySequence_Fast_GET_ITEM(seq, i);
996 if (!PyString_Check(item)){
Martin v. Löwis339d0f72001-08-17 18:39:25 +0000997#ifdef Py_USING_UNICODE
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000998 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +0000999 /* Defer to Unicode join.
1000 * CAUTION: There's no gurantee that the
1001 * original sequence can be iterated over
1002 * again, so we must pass seq here.
1003 */
1004 PyObject *result;
1005 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +00001006 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +00001007 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001008 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001009#endif
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001010 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +00001011 "sequence item %i: expected string,"
1012 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001013 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +00001014 Py_DECREF(seq);
1015 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001016 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001017 sz += PyString_GET_SIZE(item);
1018 if (i != 0)
1019 sz += seplen;
1020 if (sz < old_sz || sz > INT_MAX) {
1021 PyErr_SetString(PyExc_OverflowError,
1022 "join() is too long for a Python string");
1023 Py_DECREF(seq);
1024 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001025 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001026 }
1027
1028 /* Allocate result space. */
1029 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1030 if (res == NULL) {
1031 Py_DECREF(seq);
1032 return NULL;
1033 }
1034
1035 /* Catenate everything. */
1036 p = PyString_AS_STRING(res);
1037 for (i = 0; i < seqlen; ++i) {
1038 size_t n;
1039 item = PySequence_Fast_GET_ITEM(seq, i);
1040 n = PyString_GET_SIZE(item);
1041 memcpy(p, PyString_AS_STRING(item), n);
1042 p += n;
1043 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001044 memcpy(p, sep, seplen);
1045 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001046 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001047 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001048
Jeremy Hylton49048292000-07-11 03:28:17 +00001049 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001050 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001051}
1052
Tim Peters52e155e2001-06-16 05:42:57 +00001053PyObject *
1054_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001055{
Tim Petersa7259592001-06-16 05:11:17 +00001056 assert(sep != NULL && PyString_Check(sep));
1057 assert(x != NULL);
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001058 return string_join((PyStringObject *)sep, x);
Tim Petersa7259592001-06-16 05:11:17 +00001059}
1060
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001061static long
Fred Drakeba096332000-07-09 07:04:36 +00001062string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001063{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001064 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001065 int len = PyString_GET_SIZE(self);
1066 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001067 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001068
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001069 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001070 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001071 return -2;
1072 if (PyString_Check(subobj)) {
1073 sub = PyString_AS_STRING(subobj);
1074 n = PyString_GET_SIZE(subobj);
1075 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001076#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001077 else if (PyUnicode_Check(subobj))
1078 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001079#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001080 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001081 return -2;
1082
1083 if (last > len)
1084 last = len;
1085 if (last < 0)
1086 last += len;
1087 if (last < 0)
1088 last = 0;
1089 if (i < 0)
1090 i += len;
1091 if (i < 0)
1092 i = 0;
1093
Guido van Rossum4c08d552000-03-10 22:55:18 +00001094 if (dir > 0) {
1095 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001096 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001097 last -= n;
1098 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001099 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001100 return (long)i;
1101 }
1102 else {
1103 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001104
Guido van Rossum4c08d552000-03-10 22:55:18 +00001105 if (n == 0 && i <= last)
1106 return (long)last;
1107 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001108 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001109 return (long)j;
1110 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001111
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001112 return -1;
1113}
1114
1115
1116static char find__doc__[] =
1117"S.find(sub [,start [,end]]) -> int\n\
1118\n\
1119Return the lowest index in S where substring sub is found,\n\
1120such that sub is contained within s[start,end]. Optional\n\
1121arguments start and end are interpreted as in slice notation.\n\
1122\n\
1123Return -1 on failure.";
1124
1125static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001126string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001127{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001128 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001129 if (result == -2)
1130 return NULL;
1131 return PyInt_FromLong(result);
1132}
1133
1134
1135static char index__doc__[] =
1136"S.index(sub [,start [,end]]) -> int\n\
1137\n\
1138Like S.find() but raise ValueError when the substring is not found.";
1139
1140static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001141string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001142{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001143 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001144 if (result == -2)
1145 return NULL;
1146 if (result == -1) {
1147 PyErr_SetString(PyExc_ValueError,
1148 "substring not found in string.index");
1149 return NULL;
1150 }
1151 return PyInt_FromLong(result);
1152}
1153
1154
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001155static char rfind__doc__[] =
1156"S.rfind(sub [,start [,end]]) -> int\n\
1157\n\
1158Return the highest index in S where substring sub is found,\n\
1159such that sub is contained within s[start,end]. Optional\n\
1160arguments start and end are interpreted as in slice notation.\n\
1161\n\
1162Return -1 on failure.";
1163
1164static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001165string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001166{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001167 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001168 if (result == -2)
1169 return NULL;
1170 return PyInt_FromLong(result);
1171}
1172
1173
1174static char rindex__doc__[] =
1175"S.rindex(sub [,start [,end]]) -> int\n\
1176\n\
1177Like S.rfind() but raise ValueError when the substring is not found.";
1178
1179static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001180string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001181{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001182 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001183 if (result == -2)
1184 return NULL;
1185 if (result == -1) {
1186 PyErr_SetString(PyExc_ValueError,
1187 "substring not found in string.rindex");
1188 return NULL;
1189 }
1190 return PyInt_FromLong(result);
1191}
1192
1193
1194static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001195do_strip(PyStringObject *self, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001196{
1197 char *s = PyString_AS_STRING(self);
1198 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001199
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001200 i = 0;
1201 if (striptype != RIGHTSTRIP) {
1202 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1203 i++;
1204 }
1205 }
1206
1207 j = len;
1208 if (striptype != LEFTSTRIP) {
1209 do {
1210 j--;
1211 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1212 j++;
1213 }
1214
1215 if (i == 0 && j == len) {
1216 Py_INCREF(self);
1217 return (PyObject*)self;
1218 }
1219 else
1220 return PyString_FromStringAndSize(s+i, j-i);
1221}
1222
1223
1224static char strip__doc__[] =
1225"S.strip() -> string\n\
1226\n\
1227Return a copy of the string S with leading and trailing\n\
1228whitespace removed.";
1229
1230static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001231string_strip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001232{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001233 return do_strip(self, BOTHSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001234}
1235
1236
1237static char lstrip__doc__[] =
1238"S.lstrip() -> string\n\
1239\n\
1240Return a copy of the string S with leading whitespace removed.";
1241
1242static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001243string_lstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001244{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001245 return do_strip(self, LEFTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001246}
1247
1248
1249static char rstrip__doc__[] =
1250"S.rstrip() -> string\n\
1251\n\
1252Return a copy of the string S with trailing whitespace removed.";
1253
1254static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001255string_rstrip(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001256{
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001257 return do_strip(self, RIGHTSTRIP);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001258}
1259
1260
1261static char lower__doc__[] =
1262"S.lower() -> string\n\
1263\n\
1264Return a copy of the string S converted to lowercase.";
1265
1266static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001267string_lower(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001268{
1269 char *s = PyString_AS_STRING(self), *s_new;
1270 int i, n = PyString_GET_SIZE(self);
1271 PyObject *new;
1272
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001273 new = PyString_FromStringAndSize(NULL, n);
1274 if (new == NULL)
1275 return NULL;
1276 s_new = PyString_AsString(new);
1277 for (i = 0; i < n; i++) {
1278 int c = Py_CHARMASK(*s++);
1279 if (isupper(c)) {
1280 *s_new = tolower(c);
1281 } else
1282 *s_new = c;
1283 s_new++;
1284 }
1285 return new;
1286}
1287
1288
1289static char upper__doc__[] =
1290"S.upper() -> string\n\
1291\n\
1292Return a copy of the string S converted to uppercase.";
1293
1294static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001295string_upper(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001296{
1297 char *s = PyString_AS_STRING(self), *s_new;
1298 int i, n = PyString_GET_SIZE(self);
1299 PyObject *new;
1300
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001301 new = PyString_FromStringAndSize(NULL, n);
1302 if (new == NULL)
1303 return NULL;
1304 s_new = PyString_AsString(new);
1305 for (i = 0; i < n; i++) {
1306 int c = Py_CHARMASK(*s++);
1307 if (islower(c)) {
1308 *s_new = toupper(c);
1309 } else
1310 *s_new = c;
1311 s_new++;
1312 }
1313 return new;
1314}
1315
1316
Guido van Rossum4c08d552000-03-10 22:55:18 +00001317static char title__doc__[] =
1318"S.title() -> string\n\
1319\n\
1320Return a titlecased version of S, i.e. words start with uppercase\n\
1321characters, all remaining cased characters have lowercase.";
1322
1323static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001324string_title(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001325{
1326 char *s = PyString_AS_STRING(self), *s_new;
1327 int i, n = PyString_GET_SIZE(self);
1328 int previous_is_cased = 0;
1329 PyObject *new;
1330
Guido van Rossum4c08d552000-03-10 22:55:18 +00001331 new = PyString_FromStringAndSize(NULL, n);
1332 if (new == NULL)
1333 return NULL;
1334 s_new = PyString_AsString(new);
1335 for (i = 0; i < n; i++) {
1336 int c = Py_CHARMASK(*s++);
1337 if (islower(c)) {
1338 if (!previous_is_cased)
1339 c = toupper(c);
1340 previous_is_cased = 1;
1341 } else if (isupper(c)) {
1342 if (previous_is_cased)
1343 c = tolower(c);
1344 previous_is_cased = 1;
1345 } else
1346 previous_is_cased = 0;
1347 *s_new++ = c;
1348 }
1349 return new;
1350}
1351
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001352static char capitalize__doc__[] =
1353"S.capitalize() -> string\n\
1354\n\
1355Return a copy of the string S with only its first character\n\
1356capitalized.";
1357
1358static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001359string_capitalize(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001360{
1361 char *s = PyString_AS_STRING(self), *s_new;
1362 int i, n = PyString_GET_SIZE(self);
1363 PyObject *new;
1364
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001365 new = PyString_FromStringAndSize(NULL, n);
1366 if (new == NULL)
1367 return NULL;
1368 s_new = PyString_AsString(new);
1369 if (0 < n) {
1370 int c = Py_CHARMASK(*s++);
1371 if (islower(c))
1372 *s_new = toupper(c);
1373 else
1374 *s_new = c;
1375 s_new++;
1376 }
1377 for (i = 1; i < n; i++) {
1378 int c = Py_CHARMASK(*s++);
1379 if (isupper(c))
1380 *s_new = tolower(c);
1381 else
1382 *s_new = c;
1383 s_new++;
1384 }
1385 return new;
1386}
1387
1388
1389static char count__doc__[] =
1390"S.count(sub[, start[, end]]) -> int\n\
1391\n\
1392Return the number of occurrences of substring sub in string\n\
1393S[start:end]. Optional arguments start and end are\n\
1394interpreted as in slice notation.";
1395
1396static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001397string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001399 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 int len = PyString_GET_SIZE(self), n;
1401 int i = 0, last = INT_MAX;
1402 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001403 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404
Guido van Rossumc6821402000-05-08 14:08:05 +00001405 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1406 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001408
Guido van Rossum4c08d552000-03-10 22:55:18 +00001409 if (PyString_Check(subobj)) {
1410 sub = PyString_AS_STRING(subobj);
1411 n = PyString_GET_SIZE(subobj);
1412 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001413#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001414 else if (PyUnicode_Check(subobj)) {
1415 int count;
1416 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1417 if (count == -1)
1418 return NULL;
1419 else
1420 return PyInt_FromLong((long) count);
1421 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001422#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001423 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1424 return NULL;
1425
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001426 if (last > len)
1427 last = len;
1428 if (last < 0)
1429 last += len;
1430 if (last < 0)
1431 last = 0;
1432 if (i < 0)
1433 i += len;
1434 if (i < 0)
1435 i = 0;
1436 m = last + 1 - n;
1437 if (n == 0)
1438 return PyInt_FromLong((long) (m-i));
1439
1440 r = 0;
1441 while (i < m) {
1442 if (!memcmp(s+i, sub, n)) {
1443 r++;
1444 i += n;
1445 } else {
1446 i++;
1447 }
1448 }
1449 return PyInt_FromLong((long) r);
1450}
1451
1452
1453static char swapcase__doc__[] =
1454"S.swapcase() -> string\n\
1455\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001456Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001457converted to lowercase and vice versa.";
1458
1459static PyObject *
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00001460string_swapcase(PyStringObject *self)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001461{
1462 char *s = PyString_AS_STRING(self), *s_new;
1463 int i, n = PyString_GET_SIZE(self);
1464 PyObject *new;
1465
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001466 new = PyString_FromStringAndSize(NULL, n);
1467 if (new == NULL)
1468 return NULL;
1469 s_new = PyString_AsString(new);
1470 for (i = 0; i < n; i++) {
1471 int c = Py_CHARMASK(*s++);
1472 if (islower(c)) {
1473 *s_new = toupper(c);
1474 }
1475 else if (isupper(c)) {
1476 *s_new = tolower(c);
1477 }
1478 else
1479 *s_new = c;
1480 s_new++;
1481 }
1482 return new;
1483}
1484
1485
1486static char translate__doc__[] =
1487"S.translate(table [,deletechars]) -> string\n\
1488\n\
1489Return a copy of the string S, where all characters occurring\n\
1490in the optional argument deletechars are removed, and the\n\
1491remaining characters have been mapped through the given\n\
1492translation table, which must be a string of length 256.";
1493
1494static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001495string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001497 register char *input, *output;
1498 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 register int i, c, changed = 0;
1500 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001501 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502 int inlen, tablen, dellen = 0;
1503 PyObject *result;
1504 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001505 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506
Guido van Rossum4c08d552000-03-10 22:55:18 +00001507 if (!PyArg_ParseTuple(args, "O|O:translate",
1508 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001510
1511 if (PyString_Check(tableobj)) {
1512 table1 = PyString_AS_STRING(tableobj);
1513 tablen = PyString_GET_SIZE(tableobj);
1514 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001515#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001516 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001517 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001518 parameter; instead a mapping to None will cause characters
1519 to be deleted. */
1520 if (delobj != NULL) {
1521 PyErr_SetString(PyExc_TypeError,
1522 "deletions are implemented differently for unicode");
1523 return NULL;
1524 }
1525 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1526 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001527#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001528 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001529 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001530
1531 if (delobj != NULL) {
1532 if (PyString_Check(delobj)) {
1533 del_table = PyString_AS_STRING(delobj);
1534 dellen = PyString_GET_SIZE(delobj);
1535 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001536#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001537 else if (PyUnicode_Check(delobj)) {
1538 PyErr_SetString(PyExc_TypeError,
1539 "deletions are implemented differently for unicode");
1540 return NULL;
1541 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001542#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001543 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1544 return NULL;
1545
1546 if (tablen != 256) {
1547 PyErr_SetString(PyExc_ValueError,
1548 "translation table must be 256 characters long");
1549 return NULL;
1550 }
1551 }
1552 else {
1553 del_table = NULL;
1554 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001555 }
1556
1557 table = table1;
1558 inlen = PyString_Size(input_obj);
1559 result = PyString_FromStringAndSize((char *)NULL, inlen);
1560 if (result == NULL)
1561 return NULL;
1562 output_start = output = PyString_AsString(result);
1563 input = PyString_AsString(input_obj);
1564
1565 if (dellen == 0) {
1566 /* If no deletions are required, use faster code */
1567 for (i = inlen; --i >= 0; ) {
1568 c = Py_CHARMASK(*input++);
1569 if (Py_CHARMASK((*output++ = table[c])) != c)
1570 changed = 1;
1571 }
1572 if (changed)
1573 return result;
1574 Py_DECREF(result);
1575 Py_INCREF(input_obj);
1576 return input_obj;
1577 }
1578
1579 for (i = 0; i < 256; i++)
1580 trans_table[i] = Py_CHARMASK(table[i]);
1581
1582 for (i = 0; i < dellen; i++)
1583 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1584
1585 for (i = inlen; --i >= 0; ) {
1586 c = Py_CHARMASK(*input++);
1587 if (trans_table[c] != -1)
1588 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1589 continue;
1590 changed = 1;
1591 }
1592 if (!changed) {
1593 Py_DECREF(result);
1594 Py_INCREF(input_obj);
1595 return input_obj;
1596 }
1597 /* Fix the size of the resulting string */
1598 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1599 return NULL;
1600 return result;
1601}
1602
1603
1604/* What follows is used for implementing replace(). Perry Stoll. */
1605
1606/*
1607 mymemfind
1608
1609 strstr replacement for arbitrary blocks of memory.
1610
Barry Warsaw51ac5802000-03-20 16:36:48 +00001611 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612 contents of memory pointed to by PAT. Returns the index into MEM if
1613 found, or -1 if not found. If len of PAT is greater than length of
1614 MEM, the function returns -1.
1615*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001616static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001617mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001618{
1619 register int ii;
1620
1621 /* pattern can not occur in the last pat_len-1 chars */
1622 len -= pat_len;
1623
1624 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001625 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001626 return ii;
1627 }
1628 }
1629 return -1;
1630}
1631
1632/*
1633 mymemcnt
1634
1635 Return the number of distinct times PAT is found in MEM.
1636 meaning mem=1111 and pat==11 returns 2.
1637 mem=11111 and pat==11 also return 2.
1638 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001639static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001640mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641{
1642 register int offset = 0;
1643 int nfound = 0;
1644
1645 while (len >= 0) {
1646 offset = mymemfind(mem, len, pat, pat_len);
1647 if (offset == -1)
1648 break;
1649 mem += offset + pat_len;
1650 len -= offset + pat_len;
1651 nfound++;
1652 }
1653 return nfound;
1654}
1655
1656/*
1657 mymemreplace
1658
Thomas Wouters7e474022000-07-16 12:04:32 +00001659 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001660 replaced with SUB.
1661
Thomas Wouters7e474022000-07-16 12:04:32 +00001662 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001663 of PAT in STR, then the original string is returned. Otherwise, a new
1664 string is allocated here and returned.
1665
1666 on return, out_len is:
1667 the length of output string, or
1668 -1 if the input string is returned, or
1669 unchanged if an error occurs (no memory).
1670
1671 return value is:
1672 the new string allocated locally, or
1673 NULL if an error occurred.
1674*/
1675static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001676mymemreplace(const char *str, int len, /* input string */
1677 const char *pat, int pat_len, /* pattern string to find */
1678 const char *sub, int sub_len, /* substitution string */
1679 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001680 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001681{
1682 char *out_s;
1683 char *new_s;
1684 int nfound, offset, new_len;
1685
1686 if (len == 0 || pat_len > len)
1687 goto return_same;
1688
1689 /* find length of output string */
1690 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001691 if (count < 0)
1692 count = INT_MAX;
1693 else if (nfound > count)
1694 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001695 if (nfound == 0)
1696 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001697
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001698 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001699 if (new_len == 0) {
1700 /* Have to allocate something for the caller to free(). */
1701 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001702 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001703 return NULL;
1704 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001705 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001706 else {
1707 assert(new_len > 0);
1708 new_s = (char *)PyMem_MALLOC(new_len);
1709 if (new_s == NULL)
1710 return NULL;
1711 out_s = new_s;
1712
Tim Peters9c012af2001-05-10 00:32:57 +00001713 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001714 /* find index of next instance of pattern */
1715 offset = mymemfind(str, len, pat, pat_len);
1716 if (offset == -1)
1717 break;
1718
1719 /* copy non matching part of input string */
1720 memcpy(new_s, str, offset);
1721 str += offset + pat_len;
1722 len -= offset + pat_len;
1723
1724 /* copy substitute into the output string */
1725 new_s += offset;
1726 memcpy(new_s, sub, sub_len);
1727 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001728 }
1729 /* copy any remaining values into output string */
1730 if (len > 0)
1731 memcpy(new_s, str, len);
1732 }
1733 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001734 return out_s;
1735
1736 return_same:
1737 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001738 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739}
1740
1741
1742static char replace__doc__[] =
1743"S.replace (old, new[, maxsplit]) -> string\n\
1744\n\
1745Return a copy of string S with all occurrences of substring\n\
1746old replaced by new. If the optional argument maxsplit is\n\
1747given, only the first maxsplit occurrences are replaced.";
1748
1749static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001750string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001751{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752 const char *str = PyString_AS_STRING(self), *sub, *repl;
1753 char *new_s;
1754 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1755 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001756 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001757 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001758
Guido van Rossum4c08d552000-03-10 22:55:18 +00001759 if (!PyArg_ParseTuple(args, "OO|i:replace",
1760 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001762
1763 if (PyString_Check(subobj)) {
1764 sub = PyString_AS_STRING(subobj);
1765 sub_len = PyString_GET_SIZE(subobj);
1766 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001767#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001768 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001769 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001770 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001771#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001772 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1773 return NULL;
1774
1775 if (PyString_Check(replobj)) {
1776 repl = PyString_AS_STRING(replobj);
1777 repl_len = PyString_GET_SIZE(replobj);
1778 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001779#ifdef Py_USING_UNICODE
Guido van Rossum4c08d552000-03-10 22:55:18 +00001780 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001781 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001782 subobj, replobj, count);
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001783#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001784 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1785 return NULL;
1786
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001787 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001788 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001789 return NULL;
1790 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001791 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792 if (new_s == NULL) {
1793 PyErr_NoMemory();
1794 return NULL;
1795 }
1796 if (out_len == -1) {
1797 /* we're returning another reference to self */
1798 new = (PyObject*)self;
1799 Py_INCREF(new);
1800 }
1801 else {
1802 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001803 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001804 }
1805 return new;
1806}
1807
1808
1809static char startswith__doc__[] =
1810"S.startswith(prefix[, start[, end]]) -> int\n\
1811\n\
1812Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1813optional start, test S beginning at that position. With optional end, stop\n\
1814comparing S at that position.";
1815
1816static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001817string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001819 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001821 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001822 int plen;
1823 int start = 0;
1824 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001825 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826
Guido van Rossumc6821402000-05-08 14:08:05 +00001827 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1828 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001829 return NULL;
1830 if (PyString_Check(subobj)) {
1831 prefix = PyString_AS_STRING(subobj);
1832 plen = PyString_GET_SIZE(subobj);
1833 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001834#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001835 else if (PyUnicode_Check(subobj)) {
1836 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001837 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001838 subobj, start, end, -1);
1839 if (rc == -1)
1840 return NULL;
1841 else
1842 return PyInt_FromLong((long) rc);
1843 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001844#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001845 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001846 return NULL;
1847
1848 /* adopt Java semantics for index out of range. it is legal for
1849 * offset to be == plen, but this only returns true if prefix is
1850 * the empty string.
1851 */
1852 if (start < 0 || start+plen > len)
1853 return PyInt_FromLong(0);
1854
1855 if (!memcmp(str+start, prefix, plen)) {
1856 /* did the match end after the specified end? */
1857 if (end < 0)
1858 return PyInt_FromLong(1);
1859 else if (end - start < plen)
1860 return PyInt_FromLong(0);
1861 else
1862 return PyInt_FromLong(1);
1863 }
1864 else return PyInt_FromLong(0);
1865}
1866
1867
1868static char endswith__doc__[] =
1869"S.endswith(suffix[, start[, end]]) -> int\n\
1870\n\
1871Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1872optional start, test S beginning at that position. With optional end, stop\n\
1873comparing S at that position.";
1874
1875static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001876string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001877{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001878 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001880 const char* suffix;
1881 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001882 int start = 0;
1883 int end = -1;
1884 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001885 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001886
Guido van Rossumc6821402000-05-08 14:08:05 +00001887 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1888 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001889 return NULL;
1890 if (PyString_Check(subobj)) {
1891 suffix = PyString_AS_STRING(subobj);
1892 slen = PyString_GET_SIZE(subobj);
1893 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001894#ifdef Py_USING_UNICODE
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001895 else if (PyUnicode_Check(subobj)) {
1896 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001897 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001898 subobj, start, end, +1);
1899 if (rc == -1)
1900 return NULL;
1901 else
1902 return PyInt_FromLong((long) rc);
1903 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00001904#endif
Guido van Rossum4c08d552000-03-10 22:55:18 +00001905 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001906 return NULL;
1907
Guido van Rossum4c08d552000-03-10 22:55:18 +00001908 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001909 return PyInt_FromLong(0);
1910
1911 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001912 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001913
Guido van Rossum4c08d552000-03-10 22:55:18 +00001914 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001915 return PyInt_FromLong(1);
1916 else return PyInt_FromLong(0);
1917}
1918
1919
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001920static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001921"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001922\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001923Encodes S using the codec registered for encoding. encoding defaults\n\
1924to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001925handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1926a ValueError. Other possible values are 'ignore' and 'replace'.";
1927
1928static PyObject *
1929string_encode(PyStringObject *self, PyObject *args)
1930{
1931 char *encoding = NULL;
1932 char *errors = NULL;
1933 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1934 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001935 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
1936}
1937
1938
1939static char decode__doc__[] =
1940"S.decode([encoding[,errors]]) -> object\n\
1941\n\
1942Decodes S using the codec registered for encoding. encoding defaults\n\
1943to the default encoding. errors may be given to set a different error\n\
1944handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1945a ValueError. Other possible values are 'ignore' and 'replace'.";
1946
1947static PyObject *
1948string_decode(PyStringObject *self, PyObject *args)
1949{
1950 char *encoding = NULL;
1951 char *errors = NULL;
1952 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
1953 return NULL;
1954 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001955}
1956
1957
Guido van Rossum4c08d552000-03-10 22:55:18 +00001958static char expandtabs__doc__[] =
1959"S.expandtabs([tabsize]) -> string\n\
1960\n\
1961Return a copy of S where all tab characters are expanded using spaces.\n\
1962If tabsize is not given, a tab size of 8 characters is assumed.";
1963
1964static PyObject*
1965string_expandtabs(PyStringObject *self, PyObject *args)
1966{
1967 const char *e, *p;
1968 char *q;
1969 int i, j;
1970 PyObject *u;
1971 int tabsize = 8;
1972
1973 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1974 return NULL;
1975
Thomas Wouters7e474022000-07-16 12:04:32 +00001976 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001977 i = j = 0;
1978 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1979 for (p = PyString_AS_STRING(self); p < e; p++)
1980 if (*p == '\t') {
1981 if (tabsize > 0)
1982 j += tabsize - (j % tabsize);
1983 }
1984 else {
1985 j++;
1986 if (*p == '\n' || *p == '\r') {
1987 i += j;
1988 j = 0;
1989 }
1990 }
1991
1992 /* Second pass: create output string and fill it */
1993 u = PyString_FromStringAndSize(NULL, i + j);
1994 if (!u)
1995 return NULL;
1996
1997 j = 0;
1998 q = PyString_AS_STRING(u);
1999
2000 for (p = PyString_AS_STRING(self); p < e; p++)
2001 if (*p == '\t') {
2002 if (tabsize > 0) {
2003 i = tabsize - (j % tabsize);
2004 j += i;
2005 while (i--)
2006 *q++ = ' ';
2007 }
2008 }
2009 else {
2010 j++;
2011 *q++ = *p;
2012 if (*p == '\n' || *p == '\r')
2013 j = 0;
2014 }
2015
2016 return u;
2017}
2018
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002019static
2020PyObject *pad(PyStringObject *self,
2021 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002022 int right,
2023 char fill)
2024{
2025 PyObject *u;
2026
2027 if (left < 0)
2028 left = 0;
2029 if (right < 0)
2030 right = 0;
2031
2032 if (left == 0 && right == 0) {
2033 Py_INCREF(self);
2034 return (PyObject *)self;
2035 }
2036
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002037 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002038 left + PyString_GET_SIZE(self) + right);
2039 if (u) {
2040 if (left)
2041 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002042 memcpy(PyString_AS_STRING(u) + left,
2043 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002044 PyString_GET_SIZE(self));
2045 if (right)
2046 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2047 fill, right);
2048 }
2049
2050 return u;
2051}
2052
2053static char ljust__doc__[] =
2054"S.ljust(width) -> string\n\
2055\n\
2056Return S left justified in a string of length width. Padding is\n\
2057done using spaces.";
2058
2059static PyObject *
2060string_ljust(PyStringObject *self, PyObject *args)
2061{
2062 int width;
2063 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2064 return NULL;
2065
2066 if (PyString_GET_SIZE(self) >= width) {
2067 Py_INCREF(self);
2068 return (PyObject*) self;
2069 }
2070
2071 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2072}
2073
2074
2075static char rjust__doc__[] =
2076"S.rjust(width) -> string\n\
2077\n\
2078Return S right justified in a string of length width. Padding is\n\
2079done using spaces.";
2080
2081static PyObject *
2082string_rjust(PyStringObject *self, PyObject *args)
2083{
2084 int width;
2085 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2086 return NULL;
2087
2088 if (PyString_GET_SIZE(self) >= width) {
2089 Py_INCREF(self);
2090 return (PyObject*) self;
2091 }
2092
2093 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2094}
2095
2096
2097static char center__doc__[] =
2098"S.center(width) -> string\n\
2099\n\
2100Return S centered in a string of length width. Padding is done\n\
2101using spaces.";
2102
2103static PyObject *
2104string_center(PyStringObject *self, PyObject *args)
2105{
2106 int marg, left;
2107 int width;
2108
2109 if (!PyArg_ParseTuple(args, "i:center", &width))
2110 return NULL;
2111
2112 if (PyString_GET_SIZE(self) >= width) {
2113 Py_INCREF(self);
2114 return (PyObject*) self;
2115 }
2116
2117 marg = width - PyString_GET_SIZE(self);
2118 left = marg / 2 + (marg & width & 1);
2119
2120 return pad(self, left, marg - left, ' ');
2121}
2122
2123#if 0
2124static char zfill__doc__[] =
2125"S.zfill(width) -> string\n\
2126\n\
2127Pad a numeric string x with zeros on the left, to fill a field\n\
2128of the specified width. The string x is never truncated.";
2129
2130static PyObject *
2131string_zfill(PyStringObject *self, PyObject *args)
2132{
2133 int fill;
2134 PyObject *u;
2135 char *str;
2136
2137 int width;
2138 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2139 return NULL;
2140
2141 if (PyString_GET_SIZE(self) >= width) {
2142 Py_INCREF(self);
2143 return (PyObject*) self;
2144 }
2145
2146 fill = width - PyString_GET_SIZE(self);
2147
2148 u = pad(self, fill, 0, '0');
2149 if (u == NULL)
2150 return NULL;
2151
2152 str = PyString_AS_STRING(u);
2153 if (str[fill] == '+' || str[fill] == '-') {
2154 /* move sign to beginning of string */
2155 str[0] = str[fill];
2156 str[fill] = '0';
2157 }
2158
2159 return u;
2160}
2161#endif
2162
2163static char isspace__doc__[] =
2164"S.isspace() -> int\n\
2165\n\
2166Return 1 if there are only whitespace characters in S,\n\
21670 otherwise.";
2168
2169static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002170string_isspace(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171{
Fred Drakeba096332000-07-09 07:04:36 +00002172 register const unsigned char *p
2173 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002174 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 /* Shortcut for single character strings */
2177 if (PyString_GET_SIZE(self) == 1 &&
2178 isspace(*p))
2179 return PyInt_FromLong(1);
2180
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002181 /* Special case for empty strings */
2182 if (PyString_GET_SIZE(self) == 0)
2183 return PyInt_FromLong(0);
2184
Guido van Rossum4c08d552000-03-10 22:55:18 +00002185 e = p + PyString_GET_SIZE(self);
2186 for (; p < e; p++) {
2187 if (!isspace(*p))
2188 return PyInt_FromLong(0);
2189 }
2190 return PyInt_FromLong(1);
2191}
2192
2193
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002194static char isalpha__doc__[] =
2195"S.isalpha() -> int\n\
2196\n\
2197Return 1 if all characters in S are alphabetic\n\
2198and there is at least one character in S, 0 otherwise.";
2199
2200static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002201string_isalpha(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002202{
Fred Drakeba096332000-07-09 07:04:36 +00002203 register const unsigned char *p
2204 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002205 register const unsigned char *e;
2206
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002207 /* Shortcut for single character strings */
2208 if (PyString_GET_SIZE(self) == 1 &&
2209 isalpha(*p))
2210 return PyInt_FromLong(1);
2211
2212 /* Special case for empty strings */
2213 if (PyString_GET_SIZE(self) == 0)
2214 return PyInt_FromLong(0);
2215
2216 e = p + PyString_GET_SIZE(self);
2217 for (; p < e; p++) {
2218 if (!isalpha(*p))
2219 return PyInt_FromLong(0);
2220 }
2221 return PyInt_FromLong(1);
2222}
2223
2224
2225static char isalnum__doc__[] =
2226"S.isalnum() -> int\n\
2227\n\
2228Return 1 if all characters in S are alphanumeric\n\
2229and there is at least one character in S, 0 otherwise.";
2230
2231static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002232string_isalnum(PyStringObject *self)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002233{
Fred Drakeba096332000-07-09 07:04:36 +00002234 register const unsigned char *p
2235 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002236 register const unsigned char *e;
2237
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002238 /* Shortcut for single character strings */
2239 if (PyString_GET_SIZE(self) == 1 &&
2240 isalnum(*p))
2241 return PyInt_FromLong(1);
2242
2243 /* Special case for empty strings */
2244 if (PyString_GET_SIZE(self) == 0)
2245 return PyInt_FromLong(0);
2246
2247 e = p + PyString_GET_SIZE(self);
2248 for (; p < e; p++) {
2249 if (!isalnum(*p))
2250 return PyInt_FromLong(0);
2251 }
2252 return PyInt_FromLong(1);
2253}
2254
2255
Guido van Rossum4c08d552000-03-10 22:55:18 +00002256static char isdigit__doc__[] =
2257"S.isdigit() -> int\n\
2258\n\
2259Return 1 if there are only digit characters in S,\n\
22600 otherwise.";
2261
2262static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002263string_isdigit(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002264{
Fred Drakeba096332000-07-09 07:04:36 +00002265 register const unsigned char *p
2266 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002267 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268
Guido van Rossum4c08d552000-03-10 22:55:18 +00002269 /* Shortcut for single character strings */
2270 if (PyString_GET_SIZE(self) == 1 &&
2271 isdigit(*p))
2272 return PyInt_FromLong(1);
2273
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002274 /* Special case for empty strings */
2275 if (PyString_GET_SIZE(self) == 0)
2276 return PyInt_FromLong(0);
2277
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278 e = p + PyString_GET_SIZE(self);
2279 for (; p < e; p++) {
2280 if (!isdigit(*p))
2281 return PyInt_FromLong(0);
2282 }
2283 return PyInt_FromLong(1);
2284}
2285
2286
2287static char islower__doc__[] =
2288"S.islower() -> int\n\
2289\n\
2290Return 1 if all cased characters in S are lowercase and there is\n\
2291at least one cased character in S, 0 otherwise.";
2292
2293static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002294string_islower(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002295{
Fred Drakeba096332000-07-09 07:04:36 +00002296 register const unsigned char *p
2297 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002298 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002299 int cased;
2300
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301 /* Shortcut for single character strings */
2302 if (PyString_GET_SIZE(self) == 1)
2303 return PyInt_FromLong(islower(*p) != 0);
2304
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002305 /* Special case for empty strings */
2306 if (PyString_GET_SIZE(self) == 0)
2307 return PyInt_FromLong(0);
2308
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 e = p + PyString_GET_SIZE(self);
2310 cased = 0;
2311 for (; p < e; p++) {
2312 if (isupper(*p))
2313 return PyInt_FromLong(0);
2314 else if (!cased && islower(*p))
2315 cased = 1;
2316 }
2317 return PyInt_FromLong(cased);
2318}
2319
2320
2321static char isupper__doc__[] =
2322"S.isupper() -> int\n\
2323\n\
2324Return 1 if all cased characters in S are uppercase and there is\n\
2325at least one cased character in S, 0 otherwise.";
2326
2327static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002328string_isupper(PyStringObject *self)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002329{
Fred Drakeba096332000-07-09 07:04:36 +00002330 register const unsigned char *p
2331 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002332 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002333 int cased;
2334
Guido van Rossum4c08d552000-03-10 22:55:18 +00002335 /* Shortcut for single character strings */
2336 if (PyString_GET_SIZE(self) == 1)
2337 return PyInt_FromLong(isupper(*p) != 0);
2338
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002339 /* Special case for empty strings */
2340 if (PyString_GET_SIZE(self) == 0)
2341 return PyInt_FromLong(0);
2342
Guido van Rossum4c08d552000-03-10 22:55:18 +00002343 e = p + PyString_GET_SIZE(self);
2344 cased = 0;
2345 for (; p < e; p++) {
2346 if (islower(*p))
2347 return PyInt_FromLong(0);
2348 else if (!cased && isupper(*p))
2349 cased = 1;
2350 }
2351 return PyInt_FromLong(cased);
2352}
2353
2354
2355static char istitle__doc__[] =
2356"S.istitle() -> int\n\
2357\n\
2358Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2359may only follow uncased characters and lowercase characters only cased\n\
2360ones. Return 0 otherwise.";
2361
2362static PyObject*
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002363string_istitle(PyStringObject *self, PyObject *uncased)
Guido van Rossum4c08d552000-03-10 22:55:18 +00002364{
Fred Drakeba096332000-07-09 07:04:36 +00002365 register const unsigned char *p
2366 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002367 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 int cased, previous_is_cased;
2369
Guido van Rossum4c08d552000-03-10 22:55:18 +00002370 /* Shortcut for single character strings */
2371 if (PyString_GET_SIZE(self) == 1)
2372 return PyInt_FromLong(isupper(*p) != 0);
2373
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002374 /* Special case for empty strings */
2375 if (PyString_GET_SIZE(self) == 0)
2376 return PyInt_FromLong(0);
2377
Guido van Rossum4c08d552000-03-10 22:55:18 +00002378 e = p + PyString_GET_SIZE(self);
2379 cased = 0;
2380 previous_is_cased = 0;
2381 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002382 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383
2384 if (isupper(ch)) {
2385 if (previous_is_cased)
2386 return PyInt_FromLong(0);
2387 previous_is_cased = 1;
2388 cased = 1;
2389 }
2390 else if (islower(ch)) {
2391 if (!previous_is_cased)
2392 return PyInt_FromLong(0);
2393 previous_is_cased = 1;
2394 cased = 1;
2395 }
2396 else
2397 previous_is_cased = 0;
2398 }
2399 return PyInt_FromLong(cased);
2400}
2401
2402
2403static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002404"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002405\n\
2406Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002407Line breaks are not included in the resulting list unless keepends\n\
2408is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002409
2410#define SPLIT_APPEND(data, left, right) \
2411 str = PyString_FromStringAndSize(data + left, right - left); \
2412 if (!str) \
2413 goto onError; \
2414 if (PyList_Append(list, str)) { \
2415 Py_DECREF(str); \
2416 goto onError; \
2417 } \
2418 else \
2419 Py_DECREF(str);
2420
2421static PyObject*
2422string_splitlines(PyStringObject *self, PyObject *args)
2423{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002424 register int i;
2425 register int j;
2426 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002427 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428 PyObject *list;
2429 PyObject *str;
2430 char *data;
2431
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002432 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 return NULL;
2434
2435 data = PyString_AS_STRING(self);
2436 len = PyString_GET_SIZE(self);
2437
Guido van Rossum4c08d552000-03-10 22:55:18 +00002438 list = PyList_New(0);
2439 if (!list)
2440 goto onError;
2441
2442 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002443 int eol;
2444
Guido van Rossum4c08d552000-03-10 22:55:18 +00002445 /* Find a line and append it */
2446 while (i < len && data[i] != '\n' && data[i] != '\r')
2447 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002448
2449 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002450 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002451 if (i < len) {
2452 if (data[i] == '\r' && i + 1 < len &&
2453 data[i+1] == '\n')
2454 i += 2;
2455 else
2456 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002457 if (keepends)
2458 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002459 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002460 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002461 j = i;
2462 }
2463 if (j < len) {
2464 SPLIT_APPEND(data, j, len);
2465 }
2466
2467 return list;
2468
2469 onError:
2470 Py_DECREF(list);
2471 return NULL;
2472}
2473
2474#undef SPLIT_APPEND
2475
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002476
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002477static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002478string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002479 /* Counterparts of the obsolete stropmodule functions; except
2480 string.maketrans(). */
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002481 {"join", (PyCFunction)string_join, METH_O, join__doc__},
2482 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
2483 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
2484 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
2485 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
2486 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
2487 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
2488 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
2489 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
2490 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
2491 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
2492 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, capitalize__doc__},
2493 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
2494 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, endswith__doc__},
2495 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
2496 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
2497 {"lstrip", (PyCFunction)string_lstrip, METH_NOARGS, lstrip__doc__},
2498 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
2499 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
2500 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
2501 {"rstrip", (PyCFunction)string_rstrip, METH_NOARGS, rstrip__doc__},
2502 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, startswith__doc__},
2503 {"strip", (PyCFunction)string_strip, METH_NOARGS, strip__doc__},
2504 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, swapcase__doc__},
2505 {"translate", (PyCFunction)string_translate, METH_VARARGS, translate__doc__},
2506 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
2507 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
2508 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
2509 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
2510 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
2511 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
2512 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, expandtabs__doc__},
2513 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, splitlines__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002514#if 0
Martin v. Löwise3eb1f22001-08-16 13:15:00 +00002515 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002516#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002517 {NULL, NULL} /* sentinel */
2518};
2519
2520static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002521string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002522{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002523 PyObject *x = NULL;
2524 static char *kwlist[] = {"object", 0};
2525
2526 assert(type == &PyString_Type);
2527 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2528 return NULL;
2529 if (x == NULL)
2530 return PyString_FromString("");
2531 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002532}
2533
Tim Peters6d6c1a32001-08-02 04:15:00 +00002534static char string_doc[] =
2535"str(object) -> string\n\
2536\n\
2537Return a nice string representation of the object.\n\
2538If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002540PyTypeObject PyString_Type = {
2541 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002542 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002543 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002544 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002545 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002546 (destructor)string_dealloc, /* tp_dealloc */
2547 (printfunc)string_print, /* tp_print */
2548 0, /* tp_getattr */
2549 0, /* tp_setattr */
2550 0, /* tp_compare */
2551 (reprfunc)string_repr, /* tp_repr */
2552 0, /* tp_as_number */
2553 &string_as_sequence, /* tp_as_sequence */
2554 0, /* tp_as_mapping */
2555 (hashfunc)string_hash, /* tp_hash */
2556 0, /* tp_call */
2557 (reprfunc)string_str, /* tp_str */
2558 PyObject_GenericGetAttr, /* tp_getattro */
2559 0, /* tp_setattro */
2560 &string_as_buffer, /* tp_as_buffer */
2561 Py_TPFLAGS_DEFAULT, /* tp_flags */
2562 string_doc, /* tp_doc */
2563 0, /* tp_traverse */
2564 0, /* tp_clear */
2565 (richcmpfunc)string_richcompare, /* tp_richcompare */
2566 0, /* tp_weaklistoffset */
2567 0, /* tp_iter */
2568 0, /* tp_iternext */
2569 string_methods, /* tp_methods */
2570 0, /* tp_members */
2571 0, /* tp_getset */
2572 0, /* tp_base */
2573 0, /* tp_dict */
2574 0, /* tp_descr_get */
2575 0, /* tp_descr_set */
2576 0, /* tp_dictoffset */
2577 0, /* tp_init */
2578 0, /* tp_alloc */
2579 string_new, /* tp_new */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002580};
2581
2582void
Fred Drakeba096332000-07-09 07:04:36 +00002583PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002584{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002585 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002586 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002587 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002588 if (w == NULL || !PyString_Check(*pv)) {
2589 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002590 *pv = NULL;
2591 return;
2592 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002593 v = string_concat((PyStringObject *) *pv, w);
2594 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002595 *pv = v;
2596}
2597
Guido van Rossum013142a1994-08-30 08:19:36 +00002598void
Fred Drakeba096332000-07-09 07:04:36 +00002599PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002600{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002601 PyString_Concat(pv, w);
2602 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002603}
2604
2605
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002606/* The following function breaks the notion that strings are immutable:
2607 it changes the size of a string. We get away with this only if there
2608 is only one module referencing the object. You can also think of it
2609 as creating a new string object and destroying the old one, only
2610 more efficiently. In any case, don't use this if the string may
2611 already be known to some other part of the code... */
2612
2613int
Fred Drakeba096332000-07-09 07:04:36 +00002614_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002615{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002616 register PyObject *v;
2617 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002618 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002619 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002620 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002621 Py_DECREF(v);
2622 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002623 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002624 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002625 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002626#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002627 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002628#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002629 _Py_ForgetReference(v);
2630 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002631 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002632 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002633 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002634 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002635 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002636 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002637 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002638 _Py_NewReference(*pv);
2639 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002640 sv->ob_size = newsize;
2641 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002642 return 0;
2643}
Guido van Rossume5372401993-03-16 12:15:04 +00002644
2645/* Helpers for formatstring */
2646
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002647static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002648getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002649{
2650 int argidx = *p_argidx;
2651 if (argidx < arglen) {
2652 (*p_argidx)++;
2653 if (arglen < 0)
2654 return args;
2655 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002656 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002657 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002658 PyErr_SetString(PyExc_TypeError,
2659 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002660 return NULL;
2661}
2662
Tim Peters38fd5b62000-09-21 05:43:11 +00002663/* Format codes
2664 * F_LJUST '-'
2665 * F_SIGN '+'
2666 * F_BLANK ' '
2667 * F_ALT '#'
2668 * F_ZERO '0'
2669 */
Guido van Rossume5372401993-03-16 12:15:04 +00002670#define F_LJUST (1<<0)
2671#define F_SIGN (1<<1)
2672#define F_BLANK (1<<2)
2673#define F_ALT (1<<3)
2674#define F_ZERO (1<<4)
2675
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002676static int
Fred Drakeba096332000-07-09 07:04:36 +00002677formatfloat(char *buf, size_t buflen, int flags,
2678 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002679{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002680 /* fmt = '%#.' + `prec` + `type`
2681 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002682 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002683 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002684 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002685 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002686 if (prec < 0)
2687 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002688 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2689 type = 'g';
2690 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002691 /* worst case length calc to ensure no buffer overrun:
2692 fmt = %#.<prec>g
2693 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002694 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002695 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2696 If prec=0 the effective precision is 1 (the leading digit is
2697 always given), therefore increase by one to 10+prec. */
2698 if (buflen <= (size_t)10 + (size_t)prec) {
2699 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002700 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002701 return -1;
2702 }
Guido van Rossume5372401993-03-16 12:15:04 +00002703 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002704 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002705}
2706
Tim Peters38fd5b62000-09-21 05:43:11 +00002707/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2708 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2709 * Python's regular ints.
2710 * Return value: a new PyString*, or NULL if error.
2711 * . *pbuf is set to point into it,
2712 * *plen set to the # of chars following that.
2713 * Caller must decref it when done using pbuf.
2714 * The string starting at *pbuf is of the form
2715 * "-"? ("0x" | "0X")? digit+
2716 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002717 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002718 * There will be at least prec digits, zero-filled on the left if
2719 * necessary to get that many.
2720 * val object to be converted
2721 * flags bitmask of format flags; only F_ALT is looked at
2722 * prec minimum number of digits; 0-fill on left if needed
2723 * type a character in [duoxX]; u acts the same as d
2724 *
2725 * CAUTION: o, x and X conversions on regular ints can never
2726 * produce a '-' sign, but can for Python's unbounded ints.
2727 */
2728PyObject*
2729_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2730 char **pbuf, int *plen)
2731{
2732 PyObject *result = NULL;
2733 char *buf;
2734 int i;
2735 int sign; /* 1 if '-', else 0 */
2736 int len; /* number of characters */
2737 int numdigits; /* len == numnondigits + numdigits */
2738 int numnondigits = 0;
2739
2740 switch (type) {
2741 case 'd':
2742 case 'u':
2743 result = val->ob_type->tp_str(val);
2744 break;
2745 case 'o':
2746 result = val->ob_type->tp_as_number->nb_oct(val);
2747 break;
2748 case 'x':
2749 case 'X':
2750 numnondigits = 2;
2751 result = val->ob_type->tp_as_number->nb_hex(val);
2752 break;
2753 default:
2754 assert(!"'type' not in [duoxX]");
2755 }
2756 if (!result)
2757 return NULL;
2758
2759 /* To modify the string in-place, there can only be one reference. */
2760 if (result->ob_refcnt != 1) {
2761 PyErr_BadInternalCall();
2762 return NULL;
2763 }
2764 buf = PyString_AsString(result);
2765 len = PyString_Size(result);
2766 if (buf[len-1] == 'L') {
2767 --len;
2768 buf[len] = '\0';
2769 }
2770 sign = buf[0] == '-';
2771 numnondigits += sign;
2772 numdigits = len - numnondigits;
2773 assert(numdigits > 0);
2774
Tim Petersfff53252001-04-12 18:38:48 +00002775 /* Get rid of base marker unless F_ALT */
2776 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002777 /* Need to skip 0x, 0X or 0. */
2778 int skipped = 0;
2779 switch (type) {
2780 case 'o':
2781 assert(buf[sign] == '0');
2782 /* If 0 is only digit, leave it alone. */
2783 if (numdigits > 1) {
2784 skipped = 1;
2785 --numdigits;
2786 }
2787 break;
2788 case 'x':
2789 case 'X':
2790 assert(buf[sign] == '0');
2791 assert(buf[sign + 1] == 'x');
2792 skipped = 2;
2793 numnondigits -= 2;
2794 break;
2795 }
2796 if (skipped) {
2797 buf += skipped;
2798 len -= skipped;
2799 if (sign)
2800 buf[0] = '-';
2801 }
2802 assert(len == numnondigits + numdigits);
2803 assert(numdigits > 0);
2804 }
2805
2806 /* Fill with leading zeroes to meet minimum width. */
2807 if (prec > numdigits) {
2808 PyObject *r1 = PyString_FromStringAndSize(NULL,
2809 numnondigits + prec);
2810 char *b1;
2811 if (!r1) {
2812 Py_DECREF(result);
2813 return NULL;
2814 }
2815 b1 = PyString_AS_STRING(r1);
2816 for (i = 0; i < numnondigits; ++i)
2817 *b1++ = *buf++;
2818 for (i = 0; i < prec - numdigits; i++)
2819 *b1++ = '0';
2820 for (i = 0; i < numdigits; i++)
2821 *b1++ = *buf++;
2822 *b1 = '\0';
2823 Py_DECREF(result);
2824 result = r1;
2825 buf = PyString_AS_STRING(result);
2826 len = numnondigits + prec;
2827 }
2828
2829 /* Fix up case for hex conversions. */
2830 switch (type) {
2831 case 'x':
2832 /* Need to convert all upper case letters to lower case. */
2833 for (i = 0; i < len; i++)
2834 if (buf[i] >= 'A' && buf[i] <= 'F')
2835 buf[i] += 'a'-'A';
2836 break;
2837 case 'X':
2838 /* Need to convert 0x to 0X (and -0x to -0X). */
2839 if (buf[sign + 1] == 'x')
2840 buf[sign + 1] = 'X';
2841 break;
2842 }
2843 *pbuf = buf;
2844 *plen = len;
2845 return result;
2846}
2847
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002848static int
Fred Drakeba096332000-07-09 07:04:36 +00002849formatint(char *buf, size_t buflen, int flags,
2850 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002851{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002852 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002853 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2854 + 1 + 1 = 24 */
2855 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002856 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002857 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002858 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002859 if (prec < 0)
2860 prec = 1;
2861 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002862 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002863 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002864 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002865 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002866 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002867 return -1;
2868 }
Guido van Rossume5372401993-03-16 12:15:04 +00002869 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002870 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2871 * but we want it (for consistency with other %#x conversions, and
2872 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002873 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2874 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2875 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002876 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002877 if (x == 0 &&
2878 (flags & F_ALT) &&
2879 (type == 'x' || type == 'X') &&
2880 buf[1] != (char)type) /* this last always true under std C */
2881 {
Tim Petersfff53252001-04-12 18:38:48 +00002882 memmove(buf+2, buf, strlen(buf) + 1);
2883 buf[0] = '0';
2884 buf[1] = (char)type;
2885 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002886 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002887}
2888
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002889static int
Fred Drakeba096332000-07-09 07:04:36 +00002890formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002891{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002892 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002893 if (PyString_Check(v)) {
2894 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002895 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002896 }
2897 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002898 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002899 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002900 }
2901 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002902 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002903}
2904
Guido van Rossum013142a1994-08-30 08:19:36 +00002905
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002906/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2907
2908 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2909 chars are formatted. XXX This is a magic number. Each formatting
2910 routine does bounds checking to ensure no overflow, but a better
2911 solution may be to malloc a buffer of appropriate size for each
2912 format. For now, the current solution is sufficient.
2913*/
2914#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002915
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002916PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002917PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002918{
2919 char *fmt, *res;
2920 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002921 int args_owned = 0;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002922 PyObject *result, *orig_args;
2923#ifdef Py_USING_UNICODE
2924 PyObject *v, *w;
2925#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002926 PyObject *dict = NULL;
2927 if (format == NULL || !PyString_Check(format) || args == NULL) {
2928 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002929 return NULL;
2930 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002931 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002932 fmt = PyString_AsString(format);
2933 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002934 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002935 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002936 if (result == NULL)
2937 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002938 res = PyString_AsString(result);
2939 if (PyTuple_Check(args)) {
2940 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002941 argidx = 0;
2942 }
2943 else {
2944 arglen = -1;
2945 argidx = -2;
2946 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002947 if (args->ob_type->tp_as_mapping)
2948 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002949 while (--fmtcnt >= 0) {
2950 if (*fmt != '%') {
2951 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002952 rescnt = fmtcnt + 100;
2953 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002954 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002955 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002956 res = PyString_AsString(result)
2957 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002958 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002959 }
2960 *res++ = *fmt++;
2961 }
2962 else {
2963 /* Got a format specifier */
2964 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002965 int width = -1;
2966 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00002967 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002968 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002969 PyObject *v = NULL;
2970 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002971 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002972 int sign;
2973 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002974 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002975#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00002976 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002977 int argidx_start = argidx;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00002978#endif
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002979
Guido van Rossumda9c2711996-12-05 21:58:58 +00002980 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002981 if (*fmt == '(') {
2982 char *keystart;
2983 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002984 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002985 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002986
2987 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002988 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002989 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002990 goto error;
2991 }
2992 ++fmt;
2993 --fmtcnt;
2994 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002995 /* Skip over balanced parentheses */
2996 while (pcount > 0 && --fmtcnt >= 0) {
2997 if (*fmt == ')')
2998 --pcount;
2999 else if (*fmt == '(')
3000 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003001 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003002 }
3003 keylen = fmt - keystart - 1;
3004 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003005 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003006 "incomplete format key");
3007 goto error;
3008 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003009 key = PyString_FromStringAndSize(keystart,
3010 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003011 if (key == NULL)
3012 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003013 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003014 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003015 args_owned = 0;
3016 }
3017 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003018 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003019 if (args == NULL) {
3020 goto error;
3021 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003022 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003023 arglen = -1;
3024 argidx = -2;
3025 }
Guido van Rossume5372401993-03-16 12:15:04 +00003026 while (--fmtcnt >= 0) {
3027 switch (c = *fmt++) {
3028 case '-': flags |= F_LJUST; continue;
3029 case '+': flags |= F_SIGN; continue;
3030 case ' ': flags |= F_BLANK; continue;
3031 case '#': flags |= F_ALT; continue;
3032 case '0': flags |= F_ZERO; continue;
3033 }
3034 break;
3035 }
3036 if (c == '*') {
3037 v = getnextarg(args, arglen, &argidx);
3038 if (v == NULL)
3039 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003040 if (!PyInt_Check(v)) {
3041 PyErr_SetString(PyExc_TypeError,
3042 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003043 goto error;
3044 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003045 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003046 if (width < 0) {
3047 flags |= F_LJUST;
3048 width = -width;
3049 }
Guido van Rossume5372401993-03-16 12:15:04 +00003050 if (--fmtcnt >= 0)
3051 c = *fmt++;
3052 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003053 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003054 width = c - '0';
3055 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003056 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003057 if (!isdigit(c))
3058 break;
3059 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003060 PyErr_SetString(
3061 PyExc_ValueError,
3062 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003063 goto error;
3064 }
3065 width = width*10 + (c - '0');
3066 }
3067 }
3068 if (c == '.') {
3069 prec = 0;
3070 if (--fmtcnt >= 0)
3071 c = *fmt++;
3072 if (c == '*') {
3073 v = getnextarg(args, arglen, &argidx);
3074 if (v == NULL)
3075 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003076 if (!PyInt_Check(v)) {
3077 PyErr_SetString(
3078 PyExc_TypeError,
3079 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003080 goto error;
3081 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003082 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003083 if (prec < 0)
3084 prec = 0;
3085 if (--fmtcnt >= 0)
3086 c = *fmt++;
3087 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003088 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003089 prec = c - '0';
3090 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003091 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003092 if (!isdigit(c))
3093 break;
3094 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003095 PyErr_SetString(
3096 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003097 "prec too big");
3098 goto error;
3099 }
3100 prec = prec*10 + (c - '0');
3101 }
3102 }
3103 } /* prec */
3104 if (fmtcnt >= 0) {
3105 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003106 if (--fmtcnt >= 0)
3107 c = *fmt++;
3108 }
3109 }
3110 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003111 PyErr_SetString(PyExc_ValueError,
3112 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003113 goto error;
3114 }
3115 if (c != '%') {
3116 v = getnextarg(args, arglen, &argidx);
3117 if (v == NULL)
3118 goto error;
3119 }
3120 sign = 0;
3121 fill = ' ';
3122 switch (c) {
3123 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003124 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003125 len = 1;
3126 break;
3127 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003128 case 'r':
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003129#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003130 if (PyUnicode_Check(v)) {
3131 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003132 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003133 goto unicode;
3134 }
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003135#endif
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003136 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003137 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003138 else
3139 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003140 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003141 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003142 if (!PyString_Check(temp)) {
3143 PyErr_SetString(PyExc_TypeError,
3144 "%s argument has non-string str()");
3145 goto error;
3146 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003147 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003148 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003149 if (prec >= 0 && len > prec)
3150 len = prec;
3151 break;
3152 case 'i':
3153 case 'd':
3154 case 'u':
3155 case 'o':
3156 case 'x':
3157 case 'X':
3158 if (c == 'i')
3159 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003160 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003161 temp = _PyString_FormatLong(v, flags,
3162 prec, c, &pbuf, &len);
3163 if (!temp)
3164 goto error;
3165 /* unbounded ints can always produce
3166 a sign character! */
3167 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003168 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003169 else {
3170 pbuf = formatbuf;
3171 len = formatint(pbuf, sizeof(formatbuf),
3172 flags, prec, c, v);
3173 if (len < 0)
3174 goto error;
3175 /* only d conversion is signed */
3176 sign = c == 'd';
3177 }
3178 if (flags & F_ZERO)
3179 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003180 break;
3181 case 'e':
3182 case 'E':
3183 case 'f':
3184 case 'g':
3185 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003186 pbuf = formatbuf;
3187 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003188 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003189 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003190 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003191 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003192 fill = '0';
3193 break;
3194 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003195 pbuf = formatbuf;
3196 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003197 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003198 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003199 break;
3200 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003201 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003202 "unsupported format character '%c' (0x%x) "
3203 "at index %i",
3204 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003205 goto error;
3206 }
3207 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003208 if (*pbuf == '-' || *pbuf == '+') {
3209 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003210 len--;
3211 }
3212 else if (flags & F_SIGN)
3213 sign = '+';
3214 else if (flags & F_BLANK)
3215 sign = ' ';
3216 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003217 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003218 }
3219 if (width < len)
3220 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003221 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003222 reslen -= rescnt;
3223 rescnt = width + fmtcnt + 100;
3224 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003225 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003226 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003227 res = PyString_AsString(result)
3228 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003229 }
3230 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003231 if (fill != ' ')
3232 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003233 rescnt--;
3234 if (width > len)
3235 width--;
3236 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003237 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3238 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003239 assert(pbuf[1] == c);
3240 if (fill != ' ') {
3241 *res++ = *pbuf++;
3242 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003243 }
Tim Petersfff53252001-04-12 18:38:48 +00003244 rescnt -= 2;
3245 width -= 2;
3246 if (width < 0)
3247 width = 0;
3248 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003249 }
3250 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003251 do {
3252 --rescnt;
3253 *res++ = fill;
3254 } while (--width > len);
3255 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003256 if (fill == ' ') {
3257 if (sign)
3258 *res++ = sign;
3259 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003260 (c == 'x' || c == 'X')) {
3261 assert(pbuf[0] == '0');
3262 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003263 *res++ = *pbuf++;
3264 *res++ = *pbuf++;
3265 }
3266 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003267 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003268 res += len;
3269 rescnt -= len;
3270 while (--width >= len) {
3271 --rescnt;
3272 *res++ = ' ';
3273 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003274 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003275 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003276 "not all arguments converted");
3277 goto error;
3278 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003279 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003280 } /* '%' */
3281 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003282 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003283 PyErr_SetString(PyExc_TypeError,
3284 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003285 goto error;
3286 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003287 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003288 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003289 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003290 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003291 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003292
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003293#ifdef Py_USING_UNICODE
Guido van Rossum90daa872000-04-10 13:47:21 +00003294 unicode:
3295 if (args_owned) {
3296 Py_DECREF(args);
3297 args_owned = 0;
3298 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003299 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003300 if (PyTuple_Check(orig_args) && argidx > 0) {
3301 PyObject *v;
3302 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3303 v = PyTuple_New(n);
3304 if (v == NULL)
3305 goto error;
3306 while (--n >= 0) {
3307 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3308 Py_INCREF(w);
3309 PyTuple_SET_ITEM(v, n, w);
3310 }
3311 args = v;
3312 } else {
3313 Py_INCREF(orig_args);
3314 args = orig_args;
3315 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003316 args_owned = 1;
3317 /* Take what we have of the result and let the Unicode formatting
3318 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003319 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003320 if (_PyString_Resize(&result, rescnt))
3321 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003322 fmtcnt = PyString_GET_SIZE(format) - \
3323 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003324 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3325 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003326 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003327 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003328 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003329 if (v == NULL)
3330 goto error;
3331 /* Paste what we have (result) to what the Unicode formatting
3332 function returned (v) and return the result (or error) */
3333 w = PyUnicode_Concat(result, v);
3334 Py_DECREF(result);
3335 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003336 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003337 return w;
Martin v. Löwis339d0f72001-08-17 18:39:25 +00003338#endif /* Py_USING_UNICODE */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003339
Guido van Rossume5372401993-03-16 12:15:04 +00003340 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003341 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003342 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003343 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003344 }
Guido van Rossume5372401993-03-16 12:15:04 +00003345 return NULL;
3346}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003347
3348
3349#ifdef INTERN_STRINGS
3350
Barry Warsaw4df762f2000-08-16 23:41:01 +00003351/* This dictionary will leak at PyString_Fini() time. That's acceptable
3352 * because PyString_Fini() specifically frees interned strings that are
3353 * only referenced by this dictionary. The CVS log entry for revision 2.45
3354 * says:
3355 *
3356 * Change the Fini function to only remove otherwise unreferenced
3357 * strings from the interned table. There are references in
3358 * hard-to-find static variables all over the interpreter, and it's not
3359 * worth trying to get rid of all those; but "uninterning" isn't fair
3360 * either and may cause subtle failures later -- so we have to keep them
3361 * in the interned table.
3362 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003363static PyObject *interned;
3364
3365void
Fred Drakeba096332000-07-09 07:04:36 +00003366PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003367{
3368 register PyStringObject *s = (PyStringObject *)(*p);
3369 PyObject *t;
3370 if (s == NULL || !PyString_Check(s))
3371 Py_FatalError("PyString_InternInPlace: strings only please!");
3372 if ((t = s->ob_sinterned) != NULL) {
3373 if (t == (PyObject *)s)
3374 return;
3375 Py_INCREF(t);
3376 *p = t;
3377 Py_DECREF(s);
3378 return;
3379 }
3380 if (interned == NULL) {
3381 interned = PyDict_New();
3382 if (interned == NULL)
3383 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003384 }
3385 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3386 Py_INCREF(t);
3387 *p = s->ob_sinterned = t;
3388 Py_DECREF(s);
3389 return;
3390 }
3391 t = (PyObject *)s;
3392 if (PyDict_SetItem(interned, t, t) == 0) {
3393 s->ob_sinterned = t;
3394 return;
3395 }
3396 PyErr_Clear();
3397}
3398
3399
3400PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003401PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003402{
3403 PyObject *s = PyString_FromString(cp);
3404 if (s == NULL)
3405 return NULL;
3406 PyString_InternInPlace(&s);
3407 return s;
3408}
3409
3410#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003411
3412void
Fred Drakeba096332000-07-09 07:04:36 +00003413PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003414{
3415 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003416 for (i = 0; i < UCHAR_MAX + 1; i++) {
3417 Py_XDECREF(characters[i]);
3418 characters[i] = NULL;
3419 }
3420#ifndef DONT_SHARE_SHORT_STRINGS
3421 Py_XDECREF(nullstring);
3422 nullstring = NULL;
3423#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003424#ifdef INTERN_STRINGS
3425 if (interned) {
3426 int pos, changed;
3427 PyObject *key, *value;
3428 do {
3429 changed = 0;
3430 pos = 0;
3431 while (PyDict_Next(interned, &pos, &key, &value)) {
3432 if (key->ob_refcnt == 2 && key == value) {
3433 PyDict_DelItem(interned, key);
3434 changed = 1;
3435 }
3436 }
3437 } while (changed);
3438 }
3439#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003440}
Barry Warsawa903ad982001-02-23 16:40:48 +00003441
3442#ifdef INTERN_STRINGS
3443void _Py_ReleaseInternedStrings(void)
3444{
3445 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003446 fprintf(stderr, "releasing interned strings\n");
3447 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003448 Py_DECREF(interned);
3449 interned = NULL;
3450 }
3451}
3452#endif /* INTERN_STRINGS */