blob: c7f5062a0fb1d191524cbf2838cdc4a35003f101 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000150PyObject *PyString_Decode(const char *s,
151 int size,
152 const char *encoding,
153 const char *errors)
154{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000155 PyObject *v, *str;
156
157 str = PyString_FromStringAndSize(s, size);
158 if (str == NULL)
159 return NULL;
160 v = PyString_AsDecodedString(str, encoding, errors);
161 Py_DECREF(str);
162 return v;
163}
164
165PyObject *PyString_AsDecodedObject(PyObject *str,
166 const char *encoding,
167 const char *errors)
168{
169 PyObject *v;
170
171 if (!PyString_Check(str)) {
172 PyErr_BadArgument();
173 goto onError;
174 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000175
176 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000177 encoding = PyUnicode_GetDefaultEncoding();
178
179 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000180 v = PyCodec_Decode(str, encoding, errors);
181 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000182 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000183
184 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000185
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000186 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000187 return NULL;
188}
189
190PyObject *PyString_AsDecodedString(PyObject *str,
191 const char *encoding,
192 const char *errors)
193{
194 PyObject *v;
195
196 v = PyString_AsDecodedObject(str, encoding, errors);
197 if (v == NULL)
198 goto onError;
199
200 /* Convert Unicode to a string using the default encoding */
201 if (PyUnicode_Check(v)) {
202 PyObject *temp = v;
203 v = PyUnicode_AsEncodedString(v, NULL, NULL);
204 Py_DECREF(temp);
205 if (v == NULL)
206 goto onError;
207 }
208 if (!PyString_Check(v)) {
209 PyErr_Format(PyExc_TypeError,
210 "decoder did not return a string object (type=%.400s)",
211 v->ob_type->tp_name);
212 Py_DECREF(v);
213 goto onError;
214 }
215
216 return v;
217
218 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000219 return NULL;
220}
221
222PyObject *PyString_Encode(const char *s,
223 int size,
224 const char *encoding,
225 const char *errors)
226{
227 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000228
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000229 str = PyString_FromStringAndSize(s, size);
230 if (str == NULL)
231 return NULL;
232 v = PyString_AsEncodedString(str, encoding, errors);
233 Py_DECREF(str);
234 return v;
235}
236
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000237PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000238 const char *encoding,
239 const char *errors)
240{
241 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000242
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000243 if (!PyString_Check(str)) {
244 PyErr_BadArgument();
245 goto onError;
246 }
247
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000248 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000249 encoding = PyUnicode_GetDefaultEncoding();
250
251 /* Encode via the codec registry */
252 v = PyCodec_Encode(str, encoding, errors);
253 if (v == NULL)
254 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000255
256 return v;
257
258 onError:
259 return NULL;
260}
261
262PyObject *PyString_AsEncodedString(PyObject *str,
263 const char *encoding,
264 const char *errors)
265{
266 PyObject *v;
267
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000268 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000269 if (v == NULL)
270 goto onError;
271
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000272 /* Convert Unicode to a string using the default encoding */
273 if (PyUnicode_Check(v)) {
274 PyObject *temp = v;
275 v = PyUnicode_AsEncodedString(v, NULL, NULL);
276 Py_DECREF(temp);
277 if (v == NULL)
278 goto onError;
279 }
280 if (!PyString_Check(v)) {
281 PyErr_Format(PyExc_TypeError,
282 "encoder did not return a string object (type=%.400s)",
283 v->ob_type->tp_name);
284 Py_DECREF(v);
285 goto onError;
286 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000287
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000288 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000289
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000290 onError:
291 return NULL;
292}
293
Guido van Rossum234f9421993-06-17 12:35:49 +0000294static void
Fred Drakeba096332000-07-09 07:04:36 +0000295string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000296{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000297 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000298}
299
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000300static int
301string_getsize(register PyObject *op)
302{
303 char *s;
304 int len;
305 if (PyString_AsStringAndSize(op, &s, &len))
306 return -1;
307 return len;
308}
309
310static /*const*/ char *
311string_getbuffer(register PyObject *op)
312{
313 char *s;
314 int len;
315 if (PyString_AsStringAndSize(op, &s, &len))
316 return NULL;
317 return s;
318}
319
Guido van Rossumd7047b31995-01-02 19:07:15 +0000320int
Fred Drakeba096332000-07-09 07:04:36 +0000321PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000323 if (!PyString_Check(op))
324 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000325 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326}
327
328/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000329PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000331 if (!PyString_Check(op))
332 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000333 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334}
335
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000336int
337PyString_AsStringAndSize(register PyObject *obj,
338 register char **s,
339 register int *len)
340{
341 if (s == NULL) {
342 PyErr_BadInternalCall();
343 return -1;
344 }
345
346 if (!PyString_Check(obj)) {
347 if (PyUnicode_Check(obj)) {
348 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
349 if (obj == NULL)
350 return -1;
351 }
352 else {
353 PyErr_Format(PyExc_TypeError,
354 "expected string or Unicode object, "
355 "%.200s found", obj->ob_type->tp_name);
356 return -1;
357 }
358 }
359
360 *s = PyString_AS_STRING(obj);
361 if (len != NULL)
362 *len = PyString_GET_SIZE(obj);
363 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
364 PyErr_SetString(PyExc_TypeError,
365 "expected string without null bytes");
366 return -1;
367 }
368 return 0;
369}
370
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371/* Methods */
372
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000373static int
Fred Drakeba096332000-07-09 07:04:36 +0000374string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000375{
376 int i;
377 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000378 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000379 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000380 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000381 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000382 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000383 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000384
Thomas Wouters7e474022000-07-16 12:04:32 +0000385 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000386 quote = '\'';
387 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
388 quote = '"';
389
390 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391 for (i = 0; i < op->ob_size; i++) {
392 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000393 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000394 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000395 else if (c == '\t')
396 fprintf(fp, "\\t");
397 else if (c == '\n')
398 fprintf(fp, "\\n");
399 else if (c == '\r')
400 fprintf(fp, "\\r");
401 else if (c < ' ' || c >= 0x7f)
402 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000403 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000404 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000405 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000406 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000407 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000408}
409
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000410static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000411string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000413 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
414 PyObject *v;
415 if (newsize > INT_MAX) {
416 PyErr_SetString(PyExc_OverflowError,
417 "string is too large to make repr");
418 }
419 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000420 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000421 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000422 }
423 else {
424 register int i;
425 register char c;
426 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000427 int quote;
428
Thomas Wouters7e474022000-07-16 12:04:32 +0000429 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000430 quote = '\'';
431 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
432 quote = '"';
433
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000434 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000435 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000436 for (i = 0; i < op->ob_size; i++) {
437 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000438 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000439 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000440 else if (c == '\t')
441 *p++ = '\\', *p++ = 't';
442 else if (c == '\n')
443 *p++ = '\\', *p++ = 'n';
444 else if (c == '\r')
445 *p++ = '\\', *p++ = 'r';
446 else if (c < ' ' || c >= 0x7f) {
447 sprintf(p, "\\x%02x", c & 0xff);
448 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449 }
450 else
451 *p++ = c;
452 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000453 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000455 _PyString_Resize(
456 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000457 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000458 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459}
460
Guido van Rossum189f1df2001-05-01 16:51:53 +0000461static PyObject *
462string_str(PyObject *s)
463{
464 Py_INCREF(s);
465 return s;
466}
467
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000468static int
Fred Drakeba096332000-07-09 07:04:36 +0000469string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000470{
471 return a->ob_size;
472}
473
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000475string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000476{
477 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000478 register PyStringObject *op;
479 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000480 if (PyUnicode_Check(bb))
481 return PyUnicode_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000482 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000483 "cannot add type \"%.200s\" to string",
484 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000485 return NULL;
486 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000487#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000488 /* Optimize cases with empty left or right operand */
489 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000491 return bb;
492 }
493 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000494 Py_INCREF(a);
495 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000496 }
497 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000498 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000499 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000500 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000501 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000502 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000503 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000504#ifdef CACHE_HASH
505 op->ob_shash = -1;
506#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000507#ifdef INTERN_STRINGS
508 op->ob_sinterned = NULL;
509#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000510 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
511 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
512 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000513 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514#undef b
515}
516
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000517static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000518string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519{
520 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000521 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000522 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000523 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524 if (n < 0)
525 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000526 /* watch out for overflows: the size can overflow int,
527 * and the # of bytes needed can overflow size_t
528 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000530 if (n && size / n != a->ob_size) {
531 PyErr_SetString(PyExc_OverflowError,
532 "repeated string is too long");
533 return NULL;
534 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000535 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000536 Py_INCREF(a);
537 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000538 }
Tim Peters8f422462000-09-09 06:13:41 +0000539 nbytes = size * sizeof(char);
540 if (nbytes / sizeof(char) != (size_t)size ||
541 nbytes + sizeof(PyStringObject) <= nbytes) {
542 PyErr_SetString(PyExc_OverflowError,
543 "repeated string is too long");
544 return NULL;
545 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000546 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000547 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000548 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000549 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000550 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000551#ifdef CACHE_HASH
552 op->ob_shash = -1;
553#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000554#ifdef INTERN_STRINGS
555 op->ob_sinterned = NULL;
556#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000557 for (i = 0; i < size; i += a->ob_size)
558 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
559 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000560 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000561}
562
563/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
564
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000566string_slice(register PyStringObject *a, register int i, register int j)
567 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568{
569 if (i < 0)
570 i = 0;
571 if (j < 0)
572 j = 0; /* Avoid signed/unsigned bug in next line */
573 if (j > a->ob_size)
574 j = a->ob_size;
575 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000576 Py_INCREF(a);
577 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000578 }
579 if (j < i)
580 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000581 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000582}
583
Guido van Rossum9284a572000-03-07 15:53:43 +0000584static int
Fred Drakeba096332000-07-09 07:04:36 +0000585string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000586{
587 register char *s, *end;
588 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000589 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000590 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000591 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000592 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000593 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000594 return -1;
595 }
596 c = PyString_AsString(el)[0];
597 s = PyString_AsString(a);
598 end = s + PyString_Size(a);
599 while (s < end) {
600 if (c == *s++)
601 return 1;
602 }
603 return 0;
604}
605
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000606static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000607string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000608{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000609 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000610 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000611 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000612 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613 return NULL;
614 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000615 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000616 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000617 if (v == NULL)
618 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000619 else {
620#ifdef COUNT_ALLOCS
621 one_strings++;
622#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000623 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000624 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000625 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000626}
627
Martin v. Löwiscd353062001-05-24 16:56:35 +0000628static PyObject*
629string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000630{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000631 int c;
632 int len_a, len_b;
633 int min_len;
634 PyObject *result;
635
636 /* One of the objects is a string object. Make sure the
637 other one is one, too. */
638 if (a->ob_type != b->ob_type) {
639 result = Py_NotImplemented;
640 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000641 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000642 if (a == b) {
643 switch (op) {
644 case Py_EQ:case Py_LE:case Py_GE:
645 result = Py_True;
646 goto out;
647 case Py_NE:case Py_LT:case Py_GT:
648 result = Py_False;
649 goto out;
650 }
651 }
652 if (op == Py_EQ) {
653 /* Supporting Py_NE here as well does not save
654 much time, since Py_NE is rarely used. */
655 if (a->ob_size == b->ob_size
656 && (a->ob_sval[0] == b->ob_sval[0]
657 && memcmp(a->ob_sval, b->ob_sval,
658 a->ob_size) == 0)) {
659 result = Py_True;
660 } else {
661 result = Py_False;
662 }
663 goto out;
664 }
665 len_a = a->ob_size; len_b = b->ob_size;
666 min_len = (len_a < len_b) ? len_a : len_b;
667 if (min_len > 0) {
668 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
669 if (c==0)
670 c = memcmp(a->ob_sval, b->ob_sval, min_len);
671 }else
672 c = 0;
673 if (c == 0)
674 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
675 switch (op) {
676 case Py_LT: c = c < 0; break;
677 case Py_LE: c = c <= 0; break;
678 case Py_EQ: assert(0); break; /* unreachable */
679 case Py_NE: c = c != 0; break;
680 case Py_GT: c = c > 0; break;
681 case Py_GE: c = c >= 0; break;
682 default:
683 result = Py_NotImplemented;
684 goto out;
685 }
686 result = c ? Py_True : Py_False;
687 out:
688 Py_INCREF(result);
689 return result;
690}
691
692int
693_PyString_Eq(PyObject *o1, PyObject *o2)
694{
695 PyStringObject *a, *b;
696 a = (PyStringObject*)o1;
697 b = (PyStringObject*)o2;
698 return a->ob_size == b->ob_size
699 && *a->ob_sval == *b->ob_sval
700 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701}
702
Guido van Rossum9bfef441993-03-29 10:43:31 +0000703static long
Fred Drakeba096332000-07-09 07:04:36 +0000704string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000705{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000706 register int len;
707 register unsigned char *p;
708 register long x;
709
710#ifdef CACHE_HASH
711 if (a->ob_shash != -1)
712 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000713#ifdef INTERN_STRINGS
714 if (a->ob_sinterned != NULL)
715 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000716 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000717#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000718#endif
719 len = a->ob_size;
720 p = (unsigned char *) a->ob_sval;
721 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000722 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000723 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000724 x ^= a->ob_size;
725 if (x == -1)
726 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000727#ifdef CACHE_HASH
728 a->ob_shash = x;
729#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000730 return x;
731}
732
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000733static int
Fred Drakeba096332000-07-09 07:04:36 +0000734string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000735{
736 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000737 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000738 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000739 return -1;
740 }
741 *ptr = (void *)self->ob_sval;
742 return self->ob_size;
743}
744
745static int
Fred Drakeba096332000-07-09 07:04:36 +0000746string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000747{
Guido van Rossum045e6881997-09-08 18:30:11 +0000748 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000749 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000750 return -1;
751}
752
753static int
Fred Drakeba096332000-07-09 07:04:36 +0000754string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000755{
756 if ( lenp )
757 *lenp = self->ob_size;
758 return 1;
759}
760
Guido van Rossum1db70701998-10-08 02:18:52 +0000761static int
Fred Drakeba096332000-07-09 07:04:36 +0000762string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000763{
764 if ( index != 0 ) {
765 PyErr_SetString(PyExc_SystemError,
766 "accessing non-existent string segment");
767 return -1;
768 }
769 *ptr = self->ob_sval;
770 return self->ob_size;
771}
772
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000773static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000774 (inquiry)string_length, /*sq_length*/
775 (binaryfunc)string_concat, /*sq_concat*/
776 (intargfunc)string_repeat, /*sq_repeat*/
777 (intargfunc)string_item, /*sq_item*/
778 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000779 0, /*sq_ass_item*/
780 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000781 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782};
783
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000784static PyBufferProcs string_as_buffer = {
785 (getreadbufferproc)string_buffer_getreadbuf,
786 (getwritebufferproc)string_buffer_getwritebuf,
787 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000788 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000789};
790
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000791
792
793#define LEFTSTRIP 0
794#define RIGHTSTRIP 1
795#define BOTHSTRIP 2
796
797
798static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000799split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000800{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000801 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000802 PyObject* item;
803 PyObject *list = PyList_New(0);
804
805 if (list == NULL)
806 return NULL;
807
Guido van Rossum4c08d552000-03-10 22:55:18 +0000808 for (i = j = 0; i < len; ) {
809 while (i < len && isspace(Py_CHARMASK(s[i])))
810 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000811 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000812 while (i < len && !isspace(Py_CHARMASK(s[i])))
813 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000814 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000815 if (maxsplit-- <= 0)
816 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000817 item = PyString_FromStringAndSize(s+j, (int)(i-j));
818 if (item == NULL)
819 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000820 err = PyList_Append(list, item);
821 Py_DECREF(item);
822 if (err < 0)
823 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000824 while (i < len && isspace(Py_CHARMASK(s[i])))
825 i++;
826 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000827 }
828 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000829 if (j < len) {
830 item = PyString_FromStringAndSize(s+j, (int)(len - j));
831 if (item == NULL)
832 goto finally;
833 err = PyList_Append(list, item);
834 Py_DECREF(item);
835 if (err < 0)
836 goto finally;
837 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000838 return list;
839 finally:
840 Py_DECREF(list);
841 return NULL;
842}
843
844
845static char split__doc__[] =
846"S.split([sep [,maxsplit]]) -> list of strings\n\
847\n\
848Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000849delimiter string. If maxsplit is given, at most maxsplit\n\
850splits are done. If sep is not specified, any whitespace string\n\
851is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000852
853static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000854string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000855{
856 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000857 int maxsplit = -1;
858 const char *s = PyString_AS_STRING(self), *sub;
859 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000860
Guido van Rossum4c08d552000-03-10 22:55:18 +0000861 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000862 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000863 if (maxsplit < 0)
864 maxsplit = INT_MAX;
865 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000866 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000867 if (PyString_Check(subobj)) {
868 sub = PyString_AS_STRING(subobj);
869 n = PyString_GET_SIZE(subobj);
870 }
871 else if (PyUnicode_Check(subobj))
872 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
873 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
874 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000875 if (n == 0) {
876 PyErr_SetString(PyExc_ValueError, "empty separator");
877 return NULL;
878 }
879
880 list = PyList_New(0);
881 if (list == NULL)
882 return NULL;
883
884 i = j = 0;
885 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000886 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000887 if (maxsplit-- <= 0)
888 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889 item = PyString_FromStringAndSize(s+j, (int)(i-j));
890 if (item == NULL)
891 goto fail;
892 err = PyList_Append(list, item);
893 Py_DECREF(item);
894 if (err < 0)
895 goto fail;
896 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000897 }
898 else
899 i++;
900 }
901 item = PyString_FromStringAndSize(s+j, (int)(len-j));
902 if (item == NULL)
903 goto fail;
904 err = PyList_Append(list, item);
905 Py_DECREF(item);
906 if (err < 0)
907 goto fail;
908
909 return list;
910
911 fail:
912 Py_DECREF(list);
913 return NULL;
914}
915
916
917static char join__doc__[] =
918"S.join(sequence) -> string\n\
919\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000920Return a string which is the concatenation of the strings in the\n\
921sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000922
923static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000924string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000925{
926 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000927 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000928 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000929 char *p;
930 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000931 size_t sz = 0;
932 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000933 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000934
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000935 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000936 return NULL;
937
Tim Peters19fe14e2001-01-19 03:03:47 +0000938 seq = PySequence_Fast(orig, "");
939 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000940 if (PyErr_ExceptionMatches(PyExc_TypeError))
941 PyErr_Format(PyExc_TypeError,
942 "sequence expected, %.80s found",
943 orig->ob_type->tp_name);
944 return NULL;
945 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000946
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000947 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000948 if (seqlen == 0) {
949 Py_DECREF(seq);
950 return PyString_FromString("");
951 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000952 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000953 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000954 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
955 PyErr_Format(PyExc_TypeError,
956 "sequence item 0: expected string,"
957 " %.80s found",
958 item->ob_type->tp_name);
959 Py_DECREF(seq);
960 return NULL;
961 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000962 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000963 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000964 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000965 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000966
Tim Peters19fe14e2001-01-19 03:03:47 +0000967 /* There are at least two things to join. Do a pre-pass to figure out
968 * the total amount of space we'll need (sz), see whether any argument
969 * is absurd, and defer to the Unicode join if appropriate.
970 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000971 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000972 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000973 item = PySequence_Fast_GET_ITEM(seq, i);
974 if (!PyString_Check(item)){
975 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +0000976 /* Defer to Unicode join.
977 * CAUTION: There's no gurantee that the
978 * original sequence can be iterated over
979 * again, so we must pass seq here.
980 */
981 PyObject *result;
982 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000983 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +0000984 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000985 }
986 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000987 "sequence item %i: expected string,"
988 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000989 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000990 Py_DECREF(seq);
991 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000992 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000993 sz += PyString_GET_SIZE(item);
994 if (i != 0)
995 sz += seplen;
996 if (sz < old_sz || sz > INT_MAX) {
997 PyErr_SetString(PyExc_OverflowError,
998 "join() is too long for a Python string");
999 Py_DECREF(seq);
1000 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001001 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001002 }
1003
1004 /* Allocate result space. */
1005 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1006 if (res == NULL) {
1007 Py_DECREF(seq);
1008 return NULL;
1009 }
1010
1011 /* Catenate everything. */
1012 p = PyString_AS_STRING(res);
1013 for (i = 0; i < seqlen; ++i) {
1014 size_t n;
1015 item = PySequence_Fast_GET_ITEM(seq, i);
1016 n = PyString_GET_SIZE(item);
1017 memcpy(p, PyString_AS_STRING(item), n);
1018 p += n;
1019 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001020 memcpy(p, sep, seplen);
1021 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001022 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001023 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001024
Jeremy Hylton49048292000-07-11 03:28:17 +00001025 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001026 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001027}
1028
Tim Peters52e155e2001-06-16 05:42:57 +00001029PyObject *
1030_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001031{
1032 PyObject* args;
1033 PyObject* result = NULL;
1034
1035 assert(sep != NULL && PyString_Check(sep));
1036 assert(x != NULL);
1037 args = PyTuple_New(1);
1038 if (args != NULL) {
1039 Py_INCREF(x);
1040 PyTuple_SET_ITEM(args, 0, x);
1041 result = string_join((PyStringObject *)sep, args);
1042 Py_DECREF(args);
1043 }
1044 return result;
1045}
1046
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001047static long
Fred Drakeba096332000-07-09 07:04:36 +00001048string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001049{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001050 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001051 int len = PyString_GET_SIZE(self);
1052 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001053 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001055 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001056 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001057 return -2;
1058 if (PyString_Check(subobj)) {
1059 sub = PyString_AS_STRING(subobj);
1060 n = PyString_GET_SIZE(subobj);
1061 }
1062 else if (PyUnicode_Check(subobj))
1063 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
1064 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001065 return -2;
1066
1067 if (last > len)
1068 last = len;
1069 if (last < 0)
1070 last += len;
1071 if (last < 0)
1072 last = 0;
1073 if (i < 0)
1074 i += len;
1075 if (i < 0)
1076 i = 0;
1077
Guido van Rossum4c08d552000-03-10 22:55:18 +00001078 if (dir > 0) {
1079 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001080 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001081 last -= n;
1082 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001083 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001084 return (long)i;
1085 }
1086 else {
1087 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001088
Guido van Rossum4c08d552000-03-10 22:55:18 +00001089 if (n == 0 && i <= last)
1090 return (long)last;
1091 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001092 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001093 return (long)j;
1094 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001095
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001096 return -1;
1097}
1098
1099
1100static char find__doc__[] =
1101"S.find(sub [,start [,end]]) -> int\n\
1102\n\
1103Return the lowest index in S where substring sub is found,\n\
1104such that sub is contained within s[start,end]. Optional\n\
1105arguments start and end are interpreted as in slice notation.\n\
1106\n\
1107Return -1 on failure.";
1108
1109static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001110string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001111{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001112 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113 if (result == -2)
1114 return NULL;
1115 return PyInt_FromLong(result);
1116}
1117
1118
1119static char index__doc__[] =
1120"S.index(sub [,start [,end]]) -> int\n\
1121\n\
1122Like S.find() but raise ValueError when the substring is not found.";
1123
1124static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001125string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001127 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001128 if (result == -2)
1129 return NULL;
1130 if (result == -1) {
1131 PyErr_SetString(PyExc_ValueError,
1132 "substring not found in string.index");
1133 return NULL;
1134 }
1135 return PyInt_FromLong(result);
1136}
1137
1138
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001139static char rfind__doc__[] =
1140"S.rfind(sub [,start [,end]]) -> int\n\
1141\n\
1142Return the highest index in S where substring sub is found,\n\
1143such that sub is contained within s[start,end]. Optional\n\
1144arguments start and end are interpreted as in slice notation.\n\
1145\n\
1146Return -1 on failure.";
1147
1148static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001149string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001150{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001151 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152 if (result == -2)
1153 return NULL;
1154 return PyInt_FromLong(result);
1155}
1156
1157
1158static char rindex__doc__[] =
1159"S.rindex(sub [,start [,end]]) -> int\n\
1160\n\
1161Like S.rfind() but raise ValueError when the substring is not found.";
1162
1163static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001164string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001166 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001167 if (result == -2)
1168 return NULL;
1169 if (result == -1) {
1170 PyErr_SetString(PyExc_ValueError,
1171 "substring not found in string.rindex");
1172 return NULL;
1173 }
1174 return PyInt_FromLong(result);
1175}
1176
1177
1178static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001179do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180{
1181 char *s = PyString_AS_STRING(self);
1182 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001183
Guido van Rossum43713e52000-02-29 13:59:29 +00001184 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001185 return NULL;
1186
1187 i = 0;
1188 if (striptype != RIGHTSTRIP) {
1189 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1190 i++;
1191 }
1192 }
1193
1194 j = len;
1195 if (striptype != LEFTSTRIP) {
1196 do {
1197 j--;
1198 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1199 j++;
1200 }
1201
1202 if (i == 0 && j == len) {
1203 Py_INCREF(self);
1204 return (PyObject*)self;
1205 }
1206 else
1207 return PyString_FromStringAndSize(s+i, j-i);
1208}
1209
1210
1211static char strip__doc__[] =
1212"S.strip() -> string\n\
1213\n\
1214Return a copy of the string S with leading and trailing\n\
1215whitespace removed.";
1216
1217static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001218string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001219{
1220 return do_strip(self, args, BOTHSTRIP);
1221}
1222
1223
1224static char lstrip__doc__[] =
1225"S.lstrip() -> string\n\
1226\n\
1227Return a copy of the string S with leading whitespace removed.";
1228
1229static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001230string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001231{
1232 return do_strip(self, args, LEFTSTRIP);
1233}
1234
1235
1236static char rstrip__doc__[] =
1237"S.rstrip() -> string\n\
1238\n\
1239Return a copy of the string S with trailing whitespace removed.";
1240
1241static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001242string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001243{
1244 return do_strip(self, args, RIGHTSTRIP);
1245}
1246
1247
1248static char lower__doc__[] =
1249"S.lower() -> string\n\
1250\n\
1251Return a copy of the string S converted to lowercase.";
1252
1253static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001254string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001255{
1256 char *s = PyString_AS_STRING(self), *s_new;
1257 int i, n = PyString_GET_SIZE(self);
1258 PyObject *new;
1259
Guido van Rossum43713e52000-02-29 13:59:29 +00001260 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001261 return NULL;
1262 new = PyString_FromStringAndSize(NULL, n);
1263 if (new == NULL)
1264 return NULL;
1265 s_new = PyString_AsString(new);
1266 for (i = 0; i < n; i++) {
1267 int c = Py_CHARMASK(*s++);
1268 if (isupper(c)) {
1269 *s_new = tolower(c);
1270 } else
1271 *s_new = c;
1272 s_new++;
1273 }
1274 return new;
1275}
1276
1277
1278static char upper__doc__[] =
1279"S.upper() -> string\n\
1280\n\
1281Return a copy of the string S converted to uppercase.";
1282
1283static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001284string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001285{
1286 char *s = PyString_AS_STRING(self), *s_new;
1287 int i, n = PyString_GET_SIZE(self);
1288 PyObject *new;
1289
Guido van Rossum43713e52000-02-29 13:59:29 +00001290 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001291 return NULL;
1292 new = PyString_FromStringAndSize(NULL, n);
1293 if (new == NULL)
1294 return NULL;
1295 s_new = PyString_AsString(new);
1296 for (i = 0; i < n; i++) {
1297 int c = Py_CHARMASK(*s++);
1298 if (islower(c)) {
1299 *s_new = toupper(c);
1300 } else
1301 *s_new = c;
1302 s_new++;
1303 }
1304 return new;
1305}
1306
1307
Guido van Rossum4c08d552000-03-10 22:55:18 +00001308static char title__doc__[] =
1309"S.title() -> string\n\
1310\n\
1311Return a titlecased version of S, i.e. words start with uppercase\n\
1312characters, all remaining cased characters have lowercase.";
1313
1314static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001315string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316{
1317 char *s = PyString_AS_STRING(self), *s_new;
1318 int i, n = PyString_GET_SIZE(self);
1319 int previous_is_cased = 0;
1320 PyObject *new;
1321
1322 if (!PyArg_ParseTuple(args, ":title"))
1323 return NULL;
1324 new = PyString_FromStringAndSize(NULL, n);
1325 if (new == NULL)
1326 return NULL;
1327 s_new = PyString_AsString(new);
1328 for (i = 0; i < n; i++) {
1329 int c = Py_CHARMASK(*s++);
1330 if (islower(c)) {
1331 if (!previous_is_cased)
1332 c = toupper(c);
1333 previous_is_cased = 1;
1334 } else if (isupper(c)) {
1335 if (previous_is_cased)
1336 c = tolower(c);
1337 previous_is_cased = 1;
1338 } else
1339 previous_is_cased = 0;
1340 *s_new++ = c;
1341 }
1342 return new;
1343}
1344
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345static char capitalize__doc__[] =
1346"S.capitalize() -> string\n\
1347\n\
1348Return a copy of the string S with only its first character\n\
1349capitalized.";
1350
1351static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001352string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001353{
1354 char *s = PyString_AS_STRING(self), *s_new;
1355 int i, n = PyString_GET_SIZE(self);
1356 PyObject *new;
1357
Guido van Rossum43713e52000-02-29 13:59:29 +00001358 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001359 return NULL;
1360 new = PyString_FromStringAndSize(NULL, n);
1361 if (new == NULL)
1362 return NULL;
1363 s_new = PyString_AsString(new);
1364 if (0 < n) {
1365 int c = Py_CHARMASK(*s++);
1366 if (islower(c))
1367 *s_new = toupper(c);
1368 else
1369 *s_new = c;
1370 s_new++;
1371 }
1372 for (i = 1; i < n; i++) {
1373 int c = Py_CHARMASK(*s++);
1374 if (isupper(c))
1375 *s_new = tolower(c);
1376 else
1377 *s_new = c;
1378 s_new++;
1379 }
1380 return new;
1381}
1382
1383
1384static char count__doc__[] =
1385"S.count(sub[, start[, end]]) -> int\n\
1386\n\
1387Return the number of occurrences of substring sub in string\n\
1388S[start:end]. Optional arguments start and end are\n\
1389interpreted as in slice notation.";
1390
1391static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001392string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001394 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001395 int len = PyString_GET_SIZE(self), n;
1396 int i = 0, last = INT_MAX;
1397 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399
Guido van Rossumc6821402000-05-08 14:08:05 +00001400 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1401 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001403
Guido van Rossum4c08d552000-03-10 22:55:18 +00001404 if (PyString_Check(subobj)) {
1405 sub = PyString_AS_STRING(subobj);
1406 n = PyString_GET_SIZE(subobj);
1407 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001408 else if (PyUnicode_Check(subobj)) {
1409 int count;
1410 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1411 if (count == -1)
1412 return NULL;
1413 else
1414 return PyInt_FromLong((long) count);
1415 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001416 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1417 return NULL;
1418
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 if (last > len)
1420 last = len;
1421 if (last < 0)
1422 last += len;
1423 if (last < 0)
1424 last = 0;
1425 if (i < 0)
1426 i += len;
1427 if (i < 0)
1428 i = 0;
1429 m = last + 1 - n;
1430 if (n == 0)
1431 return PyInt_FromLong((long) (m-i));
1432
1433 r = 0;
1434 while (i < m) {
1435 if (!memcmp(s+i, sub, n)) {
1436 r++;
1437 i += n;
1438 } else {
1439 i++;
1440 }
1441 }
1442 return PyInt_FromLong((long) r);
1443}
1444
1445
1446static char swapcase__doc__[] =
1447"S.swapcase() -> string\n\
1448\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001449Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450converted to lowercase and vice versa.";
1451
1452static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001453string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454{
1455 char *s = PyString_AS_STRING(self), *s_new;
1456 int i, n = PyString_GET_SIZE(self);
1457 PyObject *new;
1458
Guido van Rossum43713e52000-02-29 13:59:29 +00001459 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 return NULL;
1461 new = PyString_FromStringAndSize(NULL, n);
1462 if (new == NULL)
1463 return NULL;
1464 s_new = PyString_AsString(new);
1465 for (i = 0; i < n; i++) {
1466 int c = Py_CHARMASK(*s++);
1467 if (islower(c)) {
1468 *s_new = toupper(c);
1469 }
1470 else if (isupper(c)) {
1471 *s_new = tolower(c);
1472 }
1473 else
1474 *s_new = c;
1475 s_new++;
1476 }
1477 return new;
1478}
1479
1480
1481static char translate__doc__[] =
1482"S.translate(table [,deletechars]) -> string\n\
1483\n\
1484Return a copy of the string S, where all characters occurring\n\
1485in the optional argument deletechars are removed, and the\n\
1486remaining characters have been mapped through the given\n\
1487translation table, which must be a string of length 256.";
1488
1489static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001490string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001492 register char *input, *output;
1493 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 register int i, c, changed = 0;
1495 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001496 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 int inlen, tablen, dellen = 0;
1498 PyObject *result;
1499 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001500 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001501
Guido van Rossum4c08d552000-03-10 22:55:18 +00001502 if (!PyArg_ParseTuple(args, "O|O:translate",
1503 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001505
1506 if (PyString_Check(tableobj)) {
1507 table1 = PyString_AS_STRING(tableobj);
1508 tablen = PyString_GET_SIZE(tableobj);
1509 }
1510 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001511 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001512 parameter; instead a mapping to None will cause characters
1513 to be deleted. */
1514 if (delobj != NULL) {
1515 PyErr_SetString(PyExc_TypeError,
1516 "deletions are implemented differently for unicode");
1517 return NULL;
1518 }
1519 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1520 }
1521 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001523
1524 if (delobj != NULL) {
1525 if (PyString_Check(delobj)) {
1526 del_table = PyString_AS_STRING(delobj);
1527 dellen = PyString_GET_SIZE(delobj);
1528 }
1529 else if (PyUnicode_Check(delobj)) {
1530 PyErr_SetString(PyExc_TypeError,
1531 "deletions are implemented differently for unicode");
1532 return NULL;
1533 }
1534 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1535 return NULL;
1536
1537 if (tablen != 256) {
1538 PyErr_SetString(PyExc_ValueError,
1539 "translation table must be 256 characters long");
1540 return NULL;
1541 }
1542 }
1543 else {
1544 del_table = NULL;
1545 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001546 }
1547
1548 table = table1;
1549 inlen = PyString_Size(input_obj);
1550 result = PyString_FromStringAndSize((char *)NULL, inlen);
1551 if (result == NULL)
1552 return NULL;
1553 output_start = output = PyString_AsString(result);
1554 input = PyString_AsString(input_obj);
1555
1556 if (dellen == 0) {
1557 /* If no deletions are required, use faster code */
1558 for (i = inlen; --i >= 0; ) {
1559 c = Py_CHARMASK(*input++);
1560 if (Py_CHARMASK((*output++ = table[c])) != c)
1561 changed = 1;
1562 }
1563 if (changed)
1564 return result;
1565 Py_DECREF(result);
1566 Py_INCREF(input_obj);
1567 return input_obj;
1568 }
1569
1570 for (i = 0; i < 256; i++)
1571 trans_table[i] = Py_CHARMASK(table[i]);
1572
1573 for (i = 0; i < dellen; i++)
1574 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1575
1576 for (i = inlen; --i >= 0; ) {
1577 c = Py_CHARMASK(*input++);
1578 if (trans_table[c] != -1)
1579 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1580 continue;
1581 changed = 1;
1582 }
1583 if (!changed) {
1584 Py_DECREF(result);
1585 Py_INCREF(input_obj);
1586 return input_obj;
1587 }
1588 /* Fix the size of the resulting string */
1589 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1590 return NULL;
1591 return result;
1592}
1593
1594
1595/* What follows is used for implementing replace(). Perry Stoll. */
1596
1597/*
1598 mymemfind
1599
1600 strstr replacement for arbitrary blocks of memory.
1601
Barry Warsaw51ac5802000-03-20 16:36:48 +00001602 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603 contents of memory pointed to by PAT. Returns the index into MEM if
1604 found, or -1 if not found. If len of PAT is greater than length of
1605 MEM, the function returns -1.
1606*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001607static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001608mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609{
1610 register int ii;
1611
1612 /* pattern can not occur in the last pat_len-1 chars */
1613 len -= pat_len;
1614
1615 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001616 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 return ii;
1618 }
1619 }
1620 return -1;
1621}
1622
1623/*
1624 mymemcnt
1625
1626 Return the number of distinct times PAT is found in MEM.
1627 meaning mem=1111 and pat==11 returns 2.
1628 mem=11111 and pat==11 also return 2.
1629 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001630static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001631mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632{
1633 register int offset = 0;
1634 int nfound = 0;
1635
1636 while (len >= 0) {
1637 offset = mymemfind(mem, len, pat, pat_len);
1638 if (offset == -1)
1639 break;
1640 mem += offset + pat_len;
1641 len -= offset + pat_len;
1642 nfound++;
1643 }
1644 return nfound;
1645}
1646
1647/*
1648 mymemreplace
1649
Thomas Wouters7e474022000-07-16 12:04:32 +00001650 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651 replaced with SUB.
1652
Thomas Wouters7e474022000-07-16 12:04:32 +00001653 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654 of PAT in STR, then the original string is returned. Otherwise, a new
1655 string is allocated here and returned.
1656
1657 on return, out_len is:
1658 the length of output string, or
1659 -1 if the input string is returned, or
1660 unchanged if an error occurs (no memory).
1661
1662 return value is:
1663 the new string allocated locally, or
1664 NULL if an error occurred.
1665*/
1666static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001667mymemreplace(const char *str, int len, /* input string */
1668 const char *pat, int pat_len, /* pattern string to find */
1669 const char *sub, int sub_len, /* substitution string */
1670 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001671 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672{
1673 char *out_s;
1674 char *new_s;
1675 int nfound, offset, new_len;
1676
1677 if (len == 0 || pat_len > len)
1678 goto return_same;
1679
1680 /* find length of output string */
1681 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001682 if (count < 0)
1683 count = INT_MAX;
1684 else if (nfound > count)
1685 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001686 if (nfound == 0)
1687 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001688
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001690 if (new_len == 0) {
1691 /* Have to allocate something for the caller to free(). */
1692 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001693 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001694 return NULL;
1695 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001697 else {
1698 assert(new_len > 0);
1699 new_s = (char *)PyMem_MALLOC(new_len);
1700 if (new_s == NULL)
1701 return NULL;
1702 out_s = new_s;
1703
Tim Peters9c012af2001-05-10 00:32:57 +00001704 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001705 /* find index of next instance of pattern */
1706 offset = mymemfind(str, len, pat, pat_len);
1707 if (offset == -1)
1708 break;
1709
1710 /* copy non matching part of input string */
1711 memcpy(new_s, str, offset);
1712 str += offset + pat_len;
1713 len -= offset + pat_len;
1714
1715 /* copy substitute into the output string */
1716 new_s += offset;
1717 memcpy(new_s, sub, sub_len);
1718 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001719 }
1720 /* copy any remaining values into output string */
1721 if (len > 0)
1722 memcpy(new_s, str, len);
1723 }
1724 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725 return out_s;
1726
1727 return_same:
1728 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001729 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730}
1731
1732
1733static char replace__doc__[] =
1734"S.replace (old, new[, maxsplit]) -> string\n\
1735\n\
1736Return a copy of string S with all occurrences of substring\n\
1737old replaced by new. If the optional argument maxsplit is\n\
1738given, only the first maxsplit occurrences are replaced.";
1739
1740static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001741string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001743 const char *str = PyString_AS_STRING(self), *sub, *repl;
1744 char *new_s;
1745 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1746 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749
Guido van Rossum4c08d552000-03-10 22:55:18 +00001750 if (!PyArg_ParseTuple(args, "OO|i:replace",
1751 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753
1754 if (PyString_Check(subobj)) {
1755 sub = PyString_AS_STRING(subobj);
1756 sub_len = PyString_GET_SIZE(subobj);
1757 }
1758 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001759 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 subobj, replobj, count);
1761 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1762 return NULL;
1763
1764 if (PyString_Check(replobj)) {
1765 repl = PyString_AS_STRING(replobj);
1766 repl_len = PyString_GET_SIZE(replobj);
1767 }
1768 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001769 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001770 subobj, replobj, count);
1771 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1772 return NULL;
1773
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001774 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001775 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776 return NULL;
1777 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001778 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779 if (new_s == NULL) {
1780 PyErr_NoMemory();
1781 return NULL;
1782 }
1783 if (out_len == -1) {
1784 /* we're returning another reference to self */
1785 new = (PyObject*)self;
1786 Py_INCREF(new);
1787 }
1788 else {
1789 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001790 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791 }
1792 return new;
1793}
1794
1795
1796static char startswith__doc__[] =
1797"S.startswith(prefix[, start[, end]]) -> int\n\
1798\n\
1799Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1800optional start, test S beginning at that position. With optional end, stop\n\
1801comparing S at that position.";
1802
1803static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001804string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001806 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001808 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809 int plen;
1810 int start = 0;
1811 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001812 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813
Guido van Rossumc6821402000-05-08 14:08:05 +00001814 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1815 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001816 return NULL;
1817 if (PyString_Check(subobj)) {
1818 prefix = PyString_AS_STRING(subobj);
1819 plen = PyString_GET_SIZE(subobj);
1820 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001821 else if (PyUnicode_Check(subobj)) {
1822 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001823 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001824 subobj, start, end, -1);
1825 if (rc == -1)
1826 return NULL;
1827 else
1828 return PyInt_FromLong((long) rc);
1829 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001830 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 return NULL;
1832
1833 /* adopt Java semantics for index out of range. it is legal for
1834 * offset to be == plen, but this only returns true if prefix is
1835 * the empty string.
1836 */
1837 if (start < 0 || start+plen > len)
1838 return PyInt_FromLong(0);
1839
1840 if (!memcmp(str+start, prefix, plen)) {
1841 /* did the match end after the specified end? */
1842 if (end < 0)
1843 return PyInt_FromLong(1);
1844 else if (end - start < plen)
1845 return PyInt_FromLong(0);
1846 else
1847 return PyInt_FromLong(1);
1848 }
1849 else return PyInt_FromLong(0);
1850}
1851
1852
1853static char endswith__doc__[] =
1854"S.endswith(suffix[, start[, end]]) -> int\n\
1855\n\
1856Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1857optional start, test S beginning at that position. With optional end, stop\n\
1858comparing S at that position.";
1859
1860static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001861string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001863 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865 const char* suffix;
1866 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867 int start = 0;
1868 int end = -1;
1869 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871
Guido van Rossumc6821402000-05-08 14:08:05 +00001872 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1873 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 return NULL;
1875 if (PyString_Check(subobj)) {
1876 suffix = PyString_AS_STRING(subobj);
1877 slen = PyString_GET_SIZE(subobj);
1878 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001879 else if (PyUnicode_Check(subobj)) {
1880 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001881 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001882 subobj, start, end, +1);
1883 if (rc == -1)
1884 return NULL;
1885 else
1886 return PyInt_FromLong((long) rc);
1887 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001888 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889 return NULL;
1890
Guido van Rossum4c08d552000-03-10 22:55:18 +00001891 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 return PyInt_FromLong(0);
1893
1894 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001895 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896
Guido van Rossum4c08d552000-03-10 22:55:18 +00001897 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898 return PyInt_FromLong(1);
1899 else return PyInt_FromLong(0);
1900}
1901
1902
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001903static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001904"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001905\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001906Encodes S using the codec registered for encoding. encoding defaults\n\
1907to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001908handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1909a ValueError. Other possible values are 'ignore' and 'replace'.";
1910
1911static PyObject *
1912string_encode(PyStringObject *self, PyObject *args)
1913{
1914 char *encoding = NULL;
1915 char *errors = NULL;
1916 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1917 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001918 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
1919}
1920
1921
1922static char decode__doc__[] =
1923"S.decode([encoding[,errors]]) -> object\n\
1924\n\
1925Decodes S using the codec registered for encoding. encoding defaults\n\
1926to the default encoding. errors may be given to set a different error\n\
1927handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1928a ValueError. Other possible values are 'ignore' and 'replace'.";
1929
1930static PyObject *
1931string_decode(PyStringObject *self, PyObject *args)
1932{
1933 char *encoding = NULL;
1934 char *errors = NULL;
1935 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
1936 return NULL;
1937 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001938}
1939
1940
Guido van Rossum4c08d552000-03-10 22:55:18 +00001941static char expandtabs__doc__[] =
1942"S.expandtabs([tabsize]) -> string\n\
1943\n\
1944Return a copy of S where all tab characters are expanded using spaces.\n\
1945If tabsize is not given, a tab size of 8 characters is assumed.";
1946
1947static PyObject*
1948string_expandtabs(PyStringObject *self, PyObject *args)
1949{
1950 const char *e, *p;
1951 char *q;
1952 int i, j;
1953 PyObject *u;
1954 int tabsize = 8;
1955
1956 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1957 return NULL;
1958
Thomas Wouters7e474022000-07-16 12:04:32 +00001959 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001960 i = j = 0;
1961 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1962 for (p = PyString_AS_STRING(self); p < e; p++)
1963 if (*p == '\t') {
1964 if (tabsize > 0)
1965 j += tabsize - (j % tabsize);
1966 }
1967 else {
1968 j++;
1969 if (*p == '\n' || *p == '\r') {
1970 i += j;
1971 j = 0;
1972 }
1973 }
1974
1975 /* Second pass: create output string and fill it */
1976 u = PyString_FromStringAndSize(NULL, i + j);
1977 if (!u)
1978 return NULL;
1979
1980 j = 0;
1981 q = PyString_AS_STRING(u);
1982
1983 for (p = PyString_AS_STRING(self); p < e; p++)
1984 if (*p == '\t') {
1985 if (tabsize > 0) {
1986 i = tabsize - (j % tabsize);
1987 j += i;
1988 while (i--)
1989 *q++ = ' ';
1990 }
1991 }
1992 else {
1993 j++;
1994 *q++ = *p;
1995 if (*p == '\n' || *p == '\r')
1996 j = 0;
1997 }
1998
1999 return u;
2000}
2001
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002002static
2003PyObject *pad(PyStringObject *self,
2004 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002005 int right,
2006 char fill)
2007{
2008 PyObject *u;
2009
2010 if (left < 0)
2011 left = 0;
2012 if (right < 0)
2013 right = 0;
2014
2015 if (left == 0 && right == 0) {
2016 Py_INCREF(self);
2017 return (PyObject *)self;
2018 }
2019
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002020 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002021 left + PyString_GET_SIZE(self) + right);
2022 if (u) {
2023 if (left)
2024 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002025 memcpy(PyString_AS_STRING(u) + left,
2026 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002027 PyString_GET_SIZE(self));
2028 if (right)
2029 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2030 fill, right);
2031 }
2032
2033 return u;
2034}
2035
2036static char ljust__doc__[] =
2037"S.ljust(width) -> string\n\
2038\n\
2039Return S left justified in a string of length width. Padding is\n\
2040done using spaces.";
2041
2042static PyObject *
2043string_ljust(PyStringObject *self, PyObject *args)
2044{
2045 int width;
2046 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2047 return NULL;
2048
2049 if (PyString_GET_SIZE(self) >= width) {
2050 Py_INCREF(self);
2051 return (PyObject*) self;
2052 }
2053
2054 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2055}
2056
2057
2058static char rjust__doc__[] =
2059"S.rjust(width) -> string\n\
2060\n\
2061Return S right justified in a string of length width. Padding is\n\
2062done using spaces.";
2063
2064static PyObject *
2065string_rjust(PyStringObject *self, PyObject *args)
2066{
2067 int width;
2068 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2069 return NULL;
2070
2071 if (PyString_GET_SIZE(self) >= width) {
2072 Py_INCREF(self);
2073 return (PyObject*) self;
2074 }
2075
2076 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2077}
2078
2079
2080static char center__doc__[] =
2081"S.center(width) -> string\n\
2082\n\
2083Return S centered in a string of length width. Padding is done\n\
2084using spaces.";
2085
2086static PyObject *
2087string_center(PyStringObject *self, PyObject *args)
2088{
2089 int marg, left;
2090 int width;
2091
2092 if (!PyArg_ParseTuple(args, "i:center", &width))
2093 return NULL;
2094
2095 if (PyString_GET_SIZE(self) >= width) {
2096 Py_INCREF(self);
2097 return (PyObject*) self;
2098 }
2099
2100 marg = width - PyString_GET_SIZE(self);
2101 left = marg / 2 + (marg & width & 1);
2102
2103 return pad(self, left, marg - left, ' ');
2104}
2105
2106#if 0
2107static char zfill__doc__[] =
2108"S.zfill(width) -> string\n\
2109\n\
2110Pad a numeric string x with zeros on the left, to fill a field\n\
2111of the specified width. The string x is never truncated.";
2112
2113static PyObject *
2114string_zfill(PyStringObject *self, PyObject *args)
2115{
2116 int fill;
2117 PyObject *u;
2118 char *str;
2119
2120 int width;
2121 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2122 return NULL;
2123
2124 if (PyString_GET_SIZE(self) >= width) {
2125 Py_INCREF(self);
2126 return (PyObject*) self;
2127 }
2128
2129 fill = width - PyString_GET_SIZE(self);
2130
2131 u = pad(self, fill, 0, '0');
2132 if (u == NULL)
2133 return NULL;
2134
2135 str = PyString_AS_STRING(u);
2136 if (str[fill] == '+' || str[fill] == '-') {
2137 /* move sign to beginning of string */
2138 str[0] = str[fill];
2139 str[fill] = '0';
2140 }
2141
2142 return u;
2143}
2144#endif
2145
2146static char isspace__doc__[] =
2147"S.isspace() -> int\n\
2148\n\
2149Return 1 if there are only whitespace characters in S,\n\
21500 otherwise.";
2151
2152static PyObject*
2153string_isspace(PyStringObject *self, PyObject *args)
2154{
Fred Drakeba096332000-07-09 07:04:36 +00002155 register const unsigned char *p
2156 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002157 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158
2159 if (!PyArg_NoArgs(args))
2160 return NULL;
2161
2162 /* Shortcut for single character strings */
2163 if (PyString_GET_SIZE(self) == 1 &&
2164 isspace(*p))
2165 return PyInt_FromLong(1);
2166
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002167 /* Special case for empty strings */
2168 if (PyString_GET_SIZE(self) == 0)
2169 return PyInt_FromLong(0);
2170
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 e = p + PyString_GET_SIZE(self);
2172 for (; p < e; p++) {
2173 if (!isspace(*p))
2174 return PyInt_FromLong(0);
2175 }
2176 return PyInt_FromLong(1);
2177}
2178
2179
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002180static char isalpha__doc__[] =
2181"S.isalpha() -> int\n\
2182\n\
2183Return 1 if all characters in S are alphabetic\n\
2184and there is at least one character in S, 0 otherwise.";
2185
2186static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002187string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002188{
Fred Drakeba096332000-07-09 07:04:36 +00002189 register const unsigned char *p
2190 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002191 register const unsigned char *e;
2192
2193 if (!PyArg_NoArgs(args))
2194 return NULL;
2195
2196 /* Shortcut for single character strings */
2197 if (PyString_GET_SIZE(self) == 1 &&
2198 isalpha(*p))
2199 return PyInt_FromLong(1);
2200
2201 /* Special case for empty strings */
2202 if (PyString_GET_SIZE(self) == 0)
2203 return PyInt_FromLong(0);
2204
2205 e = p + PyString_GET_SIZE(self);
2206 for (; p < e; p++) {
2207 if (!isalpha(*p))
2208 return PyInt_FromLong(0);
2209 }
2210 return PyInt_FromLong(1);
2211}
2212
2213
2214static char isalnum__doc__[] =
2215"S.isalnum() -> int\n\
2216\n\
2217Return 1 if all characters in S are alphanumeric\n\
2218and there is at least one character in S, 0 otherwise.";
2219
2220static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002221string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002222{
Fred Drakeba096332000-07-09 07:04:36 +00002223 register const unsigned char *p
2224 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002225 register const unsigned char *e;
2226
2227 if (!PyArg_NoArgs(args))
2228 return NULL;
2229
2230 /* Shortcut for single character strings */
2231 if (PyString_GET_SIZE(self) == 1 &&
2232 isalnum(*p))
2233 return PyInt_FromLong(1);
2234
2235 /* Special case for empty strings */
2236 if (PyString_GET_SIZE(self) == 0)
2237 return PyInt_FromLong(0);
2238
2239 e = p + PyString_GET_SIZE(self);
2240 for (; p < e; p++) {
2241 if (!isalnum(*p))
2242 return PyInt_FromLong(0);
2243 }
2244 return PyInt_FromLong(1);
2245}
2246
2247
Guido van Rossum4c08d552000-03-10 22:55:18 +00002248static char isdigit__doc__[] =
2249"S.isdigit() -> int\n\
2250\n\
2251Return 1 if there are only digit characters in S,\n\
22520 otherwise.";
2253
2254static PyObject*
2255string_isdigit(PyStringObject *self, PyObject *args)
2256{
Fred Drakeba096332000-07-09 07:04:36 +00002257 register const unsigned char *p
2258 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002259 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002260
2261 if (!PyArg_NoArgs(args))
2262 return NULL;
2263
2264 /* Shortcut for single character strings */
2265 if (PyString_GET_SIZE(self) == 1 &&
2266 isdigit(*p))
2267 return PyInt_FromLong(1);
2268
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002269 /* Special case for empty strings */
2270 if (PyString_GET_SIZE(self) == 0)
2271 return PyInt_FromLong(0);
2272
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273 e = p + PyString_GET_SIZE(self);
2274 for (; p < e; p++) {
2275 if (!isdigit(*p))
2276 return PyInt_FromLong(0);
2277 }
2278 return PyInt_FromLong(1);
2279}
2280
2281
2282static char islower__doc__[] =
2283"S.islower() -> int\n\
2284\n\
2285Return 1 if all cased characters in S are lowercase and there is\n\
2286at least one cased character in S, 0 otherwise.";
2287
2288static PyObject*
2289string_islower(PyStringObject *self, PyObject *args)
2290{
Fred Drakeba096332000-07-09 07:04:36 +00002291 register const unsigned char *p
2292 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002293 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294 int cased;
2295
2296 if (!PyArg_NoArgs(args))
2297 return NULL;
2298
2299 /* Shortcut for single character strings */
2300 if (PyString_GET_SIZE(self) == 1)
2301 return PyInt_FromLong(islower(*p) != 0);
2302
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002303 /* Special case for empty strings */
2304 if (PyString_GET_SIZE(self) == 0)
2305 return PyInt_FromLong(0);
2306
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 e = p + PyString_GET_SIZE(self);
2308 cased = 0;
2309 for (; p < e; p++) {
2310 if (isupper(*p))
2311 return PyInt_FromLong(0);
2312 else if (!cased && islower(*p))
2313 cased = 1;
2314 }
2315 return PyInt_FromLong(cased);
2316}
2317
2318
2319static char isupper__doc__[] =
2320"S.isupper() -> int\n\
2321\n\
2322Return 1 if all cased characters in S are uppercase and there is\n\
2323at least one cased character in S, 0 otherwise.";
2324
2325static PyObject*
2326string_isupper(PyStringObject *self, PyObject *args)
2327{
Fred Drakeba096332000-07-09 07:04:36 +00002328 register const unsigned char *p
2329 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002330 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331 int cased;
2332
2333 if (!PyArg_NoArgs(args))
2334 return NULL;
2335
2336 /* Shortcut for single character strings */
2337 if (PyString_GET_SIZE(self) == 1)
2338 return PyInt_FromLong(isupper(*p) != 0);
2339
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002340 /* Special case for empty strings */
2341 if (PyString_GET_SIZE(self) == 0)
2342 return PyInt_FromLong(0);
2343
Guido van Rossum4c08d552000-03-10 22:55:18 +00002344 e = p + PyString_GET_SIZE(self);
2345 cased = 0;
2346 for (; p < e; p++) {
2347 if (islower(*p))
2348 return PyInt_FromLong(0);
2349 else if (!cased && isupper(*p))
2350 cased = 1;
2351 }
2352 return PyInt_FromLong(cased);
2353}
2354
2355
2356static char istitle__doc__[] =
2357"S.istitle() -> int\n\
2358\n\
2359Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2360may only follow uncased characters and lowercase characters only cased\n\
2361ones. Return 0 otherwise.";
2362
2363static PyObject*
2364string_istitle(PyStringObject *self, PyObject *args)
2365{
Fred Drakeba096332000-07-09 07:04:36 +00002366 register const unsigned char *p
2367 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002368 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 int cased, previous_is_cased;
2370
2371 if (!PyArg_NoArgs(args))
2372 return NULL;
2373
2374 /* Shortcut for single character strings */
2375 if (PyString_GET_SIZE(self) == 1)
2376 return PyInt_FromLong(isupper(*p) != 0);
2377
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002378 /* Special case for empty strings */
2379 if (PyString_GET_SIZE(self) == 0)
2380 return PyInt_FromLong(0);
2381
Guido van Rossum4c08d552000-03-10 22:55:18 +00002382 e = p + PyString_GET_SIZE(self);
2383 cased = 0;
2384 previous_is_cased = 0;
2385 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002386 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387
2388 if (isupper(ch)) {
2389 if (previous_is_cased)
2390 return PyInt_FromLong(0);
2391 previous_is_cased = 1;
2392 cased = 1;
2393 }
2394 else if (islower(ch)) {
2395 if (!previous_is_cased)
2396 return PyInt_FromLong(0);
2397 previous_is_cased = 1;
2398 cased = 1;
2399 }
2400 else
2401 previous_is_cased = 0;
2402 }
2403 return PyInt_FromLong(cased);
2404}
2405
2406
2407static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002408"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002409\n\
2410Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002411Line breaks are not included in the resulting list unless keepends\n\
2412is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002413
2414#define SPLIT_APPEND(data, left, right) \
2415 str = PyString_FromStringAndSize(data + left, right - left); \
2416 if (!str) \
2417 goto onError; \
2418 if (PyList_Append(list, str)) { \
2419 Py_DECREF(str); \
2420 goto onError; \
2421 } \
2422 else \
2423 Py_DECREF(str);
2424
2425static PyObject*
2426string_splitlines(PyStringObject *self, PyObject *args)
2427{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428 register int i;
2429 register int j;
2430 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002431 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432 PyObject *list;
2433 PyObject *str;
2434 char *data;
2435
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002436 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002437 return NULL;
2438
2439 data = PyString_AS_STRING(self);
2440 len = PyString_GET_SIZE(self);
2441
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 list = PyList_New(0);
2443 if (!list)
2444 goto onError;
2445
2446 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002447 int eol;
2448
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449 /* Find a line and append it */
2450 while (i < len && data[i] != '\n' && data[i] != '\r')
2451 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002452
2453 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002454 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002455 if (i < len) {
2456 if (data[i] == '\r' && i + 1 < len &&
2457 data[i+1] == '\n')
2458 i += 2;
2459 else
2460 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002461 if (keepends)
2462 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002463 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002464 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465 j = i;
2466 }
2467 if (j < len) {
2468 SPLIT_APPEND(data, j, len);
2469 }
2470
2471 return list;
2472
2473 onError:
2474 Py_DECREF(list);
2475 return NULL;
2476}
2477
2478#undef SPLIT_APPEND
2479
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002481static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002483 /* Counterparts of the obsolete stropmodule functions; except
2484 string.maketrans(). */
2485 {"join", (PyCFunction)string_join, 1, join__doc__},
2486 {"split", (PyCFunction)string_split, 1, split__doc__},
2487 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2488 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2489 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2490 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2491 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2492 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2493 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002494 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2495 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002496 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2497 {"count", (PyCFunction)string_count, 1, count__doc__},
2498 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2499 {"find", (PyCFunction)string_find, 1, find__doc__},
2500 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002502 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2503 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2504 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2505 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2507 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2508 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2510 {"title", (PyCFunction)string_title, 1, title__doc__},
2511 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2512 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2513 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002514 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002515 {"decode", (PyCFunction)string_decode, 1, decode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002516 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2517 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2518#if 0
2519 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2520#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521 {NULL, NULL} /* sentinel */
2522};
2523
2524static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002525string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526{
2527 return Py_FindMethod(string_methods, (PyObject*)s, name);
2528}
2529
2530
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002531PyTypeObject PyString_Type = {
2532 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002533 0,
2534 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002535 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002536 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002537 (destructor)string_dealloc, /*tp_dealloc*/
2538 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002539 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002540 0, /*tp_setattr*/
Martin v. Löwiscd353062001-05-24 16:56:35 +00002541 0, /*tp_compare*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002542 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002543 0, /*tp_as_number*/
2544 &string_as_sequence, /*tp_as_sequence*/
2545 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002546 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002547 0, /*tp_call*/
Guido van Rossum189f1df2001-05-01 16:51:53 +00002548 (reprfunc)string_str, /*tp_str*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002549 0, /*tp_getattro*/
2550 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002551 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002552 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002553 0, /*tp_doc*/
Martin v. Löwiscd353062001-05-24 16:56:35 +00002554 0, /*tp_traverse*/
2555 0, /*tp_clear*/
2556 (richcmpfunc)string_richcompare, /*tp_richcompare*/
2557 0, /*tp_weaklistoffset*/
2558 0, /*tp_iter*/
2559 0, /*tp_iternext*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002560};
2561
2562void
Fred Drakeba096332000-07-09 07:04:36 +00002563PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002564{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002565 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002566 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002567 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002568 if (w == NULL || !PyString_Check(*pv)) {
2569 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002570 *pv = NULL;
2571 return;
2572 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002573 v = string_concat((PyStringObject *) *pv, w);
2574 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002575 *pv = v;
2576}
2577
Guido van Rossum013142a1994-08-30 08:19:36 +00002578void
Fred Drakeba096332000-07-09 07:04:36 +00002579PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002580{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002581 PyString_Concat(pv, w);
2582 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002583}
2584
2585
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002586/* The following function breaks the notion that strings are immutable:
2587 it changes the size of a string. We get away with this only if there
2588 is only one module referencing the object. You can also think of it
2589 as creating a new string object and destroying the old one, only
2590 more efficiently. In any case, don't use this if the string may
2591 already be known to some other part of the code... */
2592
2593int
Fred Drakeba096332000-07-09 07:04:36 +00002594_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002595{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002596 register PyObject *v;
2597 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002598 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002599 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002600 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002601 Py_DECREF(v);
2602 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002603 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002604 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002605 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002606#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002607 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002608#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002609 _Py_ForgetReference(v);
2610 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002611 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002612 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002613 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002614 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002615 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002616 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002617 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002618 _Py_NewReference(*pv);
2619 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002620 sv->ob_size = newsize;
2621 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002622 return 0;
2623}
Guido van Rossume5372401993-03-16 12:15:04 +00002624
2625/* Helpers for formatstring */
2626
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002627static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002628getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002629{
2630 int argidx = *p_argidx;
2631 if (argidx < arglen) {
2632 (*p_argidx)++;
2633 if (arglen < 0)
2634 return args;
2635 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002636 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002637 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002638 PyErr_SetString(PyExc_TypeError,
2639 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002640 return NULL;
2641}
2642
Tim Peters38fd5b62000-09-21 05:43:11 +00002643/* Format codes
2644 * F_LJUST '-'
2645 * F_SIGN '+'
2646 * F_BLANK ' '
2647 * F_ALT '#'
2648 * F_ZERO '0'
2649 */
Guido van Rossume5372401993-03-16 12:15:04 +00002650#define F_LJUST (1<<0)
2651#define F_SIGN (1<<1)
2652#define F_BLANK (1<<2)
2653#define F_ALT (1<<3)
2654#define F_ZERO (1<<4)
2655
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002656static int
Fred Drakeba096332000-07-09 07:04:36 +00002657formatfloat(char *buf, size_t buflen, int flags,
2658 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002659{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002660 /* fmt = '%#.' + `prec` + `type`
2661 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002662 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002663 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002664 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002665 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002666 if (prec < 0)
2667 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002668 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2669 type = 'g';
2670 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002671 /* worst case length calc to ensure no buffer overrun:
2672 fmt = %#.<prec>g
2673 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002674 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002675 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2676 If prec=0 the effective precision is 1 (the leading digit is
2677 always given), therefore increase by one to 10+prec. */
2678 if (buflen <= (size_t)10 + (size_t)prec) {
2679 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002680 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002681 return -1;
2682 }
Guido van Rossume5372401993-03-16 12:15:04 +00002683 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002684 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002685}
2686
Tim Peters38fd5b62000-09-21 05:43:11 +00002687/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2688 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2689 * Python's regular ints.
2690 * Return value: a new PyString*, or NULL if error.
2691 * . *pbuf is set to point into it,
2692 * *plen set to the # of chars following that.
2693 * Caller must decref it when done using pbuf.
2694 * The string starting at *pbuf is of the form
2695 * "-"? ("0x" | "0X")? digit+
2696 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002697 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002698 * There will be at least prec digits, zero-filled on the left if
2699 * necessary to get that many.
2700 * val object to be converted
2701 * flags bitmask of format flags; only F_ALT is looked at
2702 * prec minimum number of digits; 0-fill on left if needed
2703 * type a character in [duoxX]; u acts the same as d
2704 *
2705 * CAUTION: o, x and X conversions on regular ints can never
2706 * produce a '-' sign, but can for Python's unbounded ints.
2707 */
2708PyObject*
2709_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2710 char **pbuf, int *plen)
2711{
2712 PyObject *result = NULL;
2713 char *buf;
2714 int i;
2715 int sign; /* 1 if '-', else 0 */
2716 int len; /* number of characters */
2717 int numdigits; /* len == numnondigits + numdigits */
2718 int numnondigits = 0;
2719
2720 switch (type) {
2721 case 'd':
2722 case 'u':
2723 result = val->ob_type->tp_str(val);
2724 break;
2725 case 'o':
2726 result = val->ob_type->tp_as_number->nb_oct(val);
2727 break;
2728 case 'x':
2729 case 'X':
2730 numnondigits = 2;
2731 result = val->ob_type->tp_as_number->nb_hex(val);
2732 break;
2733 default:
2734 assert(!"'type' not in [duoxX]");
2735 }
2736 if (!result)
2737 return NULL;
2738
2739 /* To modify the string in-place, there can only be one reference. */
2740 if (result->ob_refcnt != 1) {
2741 PyErr_BadInternalCall();
2742 return NULL;
2743 }
2744 buf = PyString_AsString(result);
2745 len = PyString_Size(result);
2746 if (buf[len-1] == 'L') {
2747 --len;
2748 buf[len] = '\0';
2749 }
2750 sign = buf[0] == '-';
2751 numnondigits += sign;
2752 numdigits = len - numnondigits;
2753 assert(numdigits > 0);
2754
Tim Petersfff53252001-04-12 18:38:48 +00002755 /* Get rid of base marker unless F_ALT */
2756 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002757 /* Need to skip 0x, 0X or 0. */
2758 int skipped = 0;
2759 switch (type) {
2760 case 'o':
2761 assert(buf[sign] == '0');
2762 /* If 0 is only digit, leave it alone. */
2763 if (numdigits > 1) {
2764 skipped = 1;
2765 --numdigits;
2766 }
2767 break;
2768 case 'x':
2769 case 'X':
2770 assert(buf[sign] == '0');
2771 assert(buf[sign + 1] == 'x');
2772 skipped = 2;
2773 numnondigits -= 2;
2774 break;
2775 }
2776 if (skipped) {
2777 buf += skipped;
2778 len -= skipped;
2779 if (sign)
2780 buf[0] = '-';
2781 }
2782 assert(len == numnondigits + numdigits);
2783 assert(numdigits > 0);
2784 }
2785
2786 /* Fill with leading zeroes to meet minimum width. */
2787 if (prec > numdigits) {
2788 PyObject *r1 = PyString_FromStringAndSize(NULL,
2789 numnondigits + prec);
2790 char *b1;
2791 if (!r1) {
2792 Py_DECREF(result);
2793 return NULL;
2794 }
2795 b1 = PyString_AS_STRING(r1);
2796 for (i = 0; i < numnondigits; ++i)
2797 *b1++ = *buf++;
2798 for (i = 0; i < prec - numdigits; i++)
2799 *b1++ = '0';
2800 for (i = 0; i < numdigits; i++)
2801 *b1++ = *buf++;
2802 *b1 = '\0';
2803 Py_DECREF(result);
2804 result = r1;
2805 buf = PyString_AS_STRING(result);
2806 len = numnondigits + prec;
2807 }
2808
2809 /* Fix up case for hex conversions. */
2810 switch (type) {
2811 case 'x':
2812 /* Need to convert all upper case letters to lower case. */
2813 for (i = 0; i < len; i++)
2814 if (buf[i] >= 'A' && buf[i] <= 'F')
2815 buf[i] += 'a'-'A';
2816 break;
2817 case 'X':
2818 /* Need to convert 0x to 0X (and -0x to -0X). */
2819 if (buf[sign + 1] == 'x')
2820 buf[sign + 1] = 'X';
2821 break;
2822 }
2823 *pbuf = buf;
2824 *plen = len;
2825 return result;
2826}
2827
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002828static int
Fred Drakeba096332000-07-09 07:04:36 +00002829formatint(char *buf, size_t buflen, int flags,
2830 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002831{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002832 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002833 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2834 + 1 + 1 = 24 */
2835 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002836 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002837 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002838 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002839 if (prec < 0)
2840 prec = 1;
2841 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002842 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002843 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002844 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002845 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002846 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002847 return -1;
2848 }
Guido van Rossume5372401993-03-16 12:15:04 +00002849 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002850 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2851 * but we want it (for consistency with other %#x conversions, and
2852 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002853 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2854 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2855 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002856 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002857 if (x == 0 &&
2858 (flags & F_ALT) &&
2859 (type == 'x' || type == 'X') &&
2860 buf[1] != (char)type) /* this last always true under std C */
2861 {
Tim Petersfff53252001-04-12 18:38:48 +00002862 memmove(buf+2, buf, strlen(buf) + 1);
2863 buf[0] = '0';
2864 buf[1] = (char)type;
2865 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002866 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002867}
2868
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002869static int
Fred Drakeba096332000-07-09 07:04:36 +00002870formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002871{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002872 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002873 if (PyString_Check(v)) {
2874 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002875 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002876 }
2877 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002878 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002879 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002880 }
2881 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002882 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002883}
2884
Guido van Rossum013142a1994-08-30 08:19:36 +00002885
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002886/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2887
2888 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2889 chars are formatted. XXX This is a magic number. Each formatting
2890 routine does bounds checking to ensure no overflow, but a better
2891 solution may be to malloc a buffer of appropriate size for each
2892 format. For now, the current solution is sufficient.
2893*/
2894#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002895
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002896PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002897PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002898{
2899 char *fmt, *res;
2900 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002901 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002902 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002903 PyObject *dict = NULL;
2904 if (format == NULL || !PyString_Check(format) || args == NULL) {
2905 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002906 return NULL;
2907 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002908 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002909 fmt = PyString_AsString(format);
2910 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002911 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002912 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002913 if (result == NULL)
2914 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002915 res = PyString_AsString(result);
2916 if (PyTuple_Check(args)) {
2917 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002918 argidx = 0;
2919 }
2920 else {
2921 arglen = -1;
2922 argidx = -2;
2923 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002924 if (args->ob_type->tp_as_mapping)
2925 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002926 while (--fmtcnt >= 0) {
2927 if (*fmt != '%') {
2928 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002929 rescnt = fmtcnt + 100;
2930 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002931 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002932 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002933 res = PyString_AsString(result)
2934 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002935 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002936 }
2937 *res++ = *fmt++;
2938 }
2939 else {
2940 /* Got a format specifier */
2941 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002942 int width = -1;
2943 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00002944 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002945 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002946 PyObject *v = NULL;
2947 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002948 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002949 int sign;
2950 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002951 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002952 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002953 int argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002954
Guido van Rossumda9c2711996-12-05 21:58:58 +00002955 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002956 if (*fmt == '(') {
2957 char *keystart;
2958 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002959 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002960 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002961
2962 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002963 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002964 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002965 goto error;
2966 }
2967 ++fmt;
2968 --fmtcnt;
2969 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002970 /* Skip over balanced parentheses */
2971 while (pcount > 0 && --fmtcnt >= 0) {
2972 if (*fmt == ')')
2973 --pcount;
2974 else if (*fmt == '(')
2975 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002976 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002977 }
2978 keylen = fmt - keystart - 1;
2979 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002980 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002981 "incomplete format key");
2982 goto error;
2983 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002984 key = PyString_FromStringAndSize(keystart,
2985 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002986 if (key == NULL)
2987 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002988 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002989 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002990 args_owned = 0;
2991 }
2992 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002993 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002994 if (args == NULL) {
2995 goto error;
2996 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002997 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002998 arglen = -1;
2999 argidx = -2;
3000 }
Guido van Rossume5372401993-03-16 12:15:04 +00003001 while (--fmtcnt >= 0) {
3002 switch (c = *fmt++) {
3003 case '-': flags |= F_LJUST; continue;
3004 case '+': flags |= F_SIGN; continue;
3005 case ' ': flags |= F_BLANK; continue;
3006 case '#': flags |= F_ALT; continue;
3007 case '0': flags |= F_ZERO; continue;
3008 }
3009 break;
3010 }
3011 if (c == '*') {
3012 v = getnextarg(args, arglen, &argidx);
3013 if (v == NULL)
3014 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003015 if (!PyInt_Check(v)) {
3016 PyErr_SetString(PyExc_TypeError,
3017 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003018 goto error;
3019 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003020 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003021 if (width < 0) {
3022 flags |= F_LJUST;
3023 width = -width;
3024 }
Guido van Rossume5372401993-03-16 12:15:04 +00003025 if (--fmtcnt >= 0)
3026 c = *fmt++;
3027 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003028 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003029 width = c - '0';
3030 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003031 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003032 if (!isdigit(c))
3033 break;
3034 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003035 PyErr_SetString(
3036 PyExc_ValueError,
3037 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003038 goto error;
3039 }
3040 width = width*10 + (c - '0');
3041 }
3042 }
3043 if (c == '.') {
3044 prec = 0;
3045 if (--fmtcnt >= 0)
3046 c = *fmt++;
3047 if (c == '*') {
3048 v = getnextarg(args, arglen, &argidx);
3049 if (v == NULL)
3050 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003051 if (!PyInt_Check(v)) {
3052 PyErr_SetString(
3053 PyExc_TypeError,
3054 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003055 goto error;
3056 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003057 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003058 if (prec < 0)
3059 prec = 0;
3060 if (--fmtcnt >= 0)
3061 c = *fmt++;
3062 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003063 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003064 prec = c - '0';
3065 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003066 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003067 if (!isdigit(c))
3068 break;
3069 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003070 PyErr_SetString(
3071 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003072 "prec too big");
3073 goto error;
3074 }
3075 prec = prec*10 + (c - '0');
3076 }
3077 }
3078 } /* prec */
3079 if (fmtcnt >= 0) {
3080 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003081 if (--fmtcnt >= 0)
3082 c = *fmt++;
3083 }
3084 }
3085 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003086 PyErr_SetString(PyExc_ValueError,
3087 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003088 goto error;
3089 }
3090 if (c != '%') {
3091 v = getnextarg(args, arglen, &argidx);
3092 if (v == NULL)
3093 goto error;
3094 }
3095 sign = 0;
3096 fill = ' ';
3097 switch (c) {
3098 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003099 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003100 len = 1;
3101 break;
3102 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003103 case 'r':
3104 if (PyUnicode_Check(v)) {
3105 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003106 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003107 goto unicode;
3108 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003109 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003110 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003111 else
3112 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003113 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003114 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003115 if (!PyString_Check(temp)) {
3116 PyErr_SetString(PyExc_TypeError,
3117 "%s argument has non-string str()");
3118 goto error;
3119 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003120 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003121 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003122 if (prec >= 0 && len > prec)
3123 len = prec;
3124 break;
3125 case 'i':
3126 case 'd':
3127 case 'u':
3128 case 'o':
3129 case 'x':
3130 case 'X':
3131 if (c == 'i')
3132 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003133 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003134 temp = _PyString_FormatLong(v, flags,
3135 prec, c, &pbuf, &len);
3136 if (!temp)
3137 goto error;
3138 /* unbounded ints can always produce
3139 a sign character! */
3140 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003141 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003142 else {
3143 pbuf = formatbuf;
3144 len = formatint(pbuf, sizeof(formatbuf),
3145 flags, prec, c, v);
3146 if (len < 0)
3147 goto error;
3148 /* only d conversion is signed */
3149 sign = c == 'd';
3150 }
3151 if (flags & F_ZERO)
3152 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003153 break;
3154 case 'e':
3155 case 'E':
3156 case 'f':
3157 case 'g':
3158 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003159 pbuf = formatbuf;
3160 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003161 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003162 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003163 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003164 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003165 fill = '0';
3166 break;
3167 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003168 pbuf = formatbuf;
3169 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003170 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003171 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003172 break;
3173 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003174 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003175 "unsupported format character '%c' (0x%x) "
3176 "at index %i",
3177 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003178 goto error;
3179 }
3180 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003181 if (*pbuf == '-' || *pbuf == '+') {
3182 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003183 len--;
3184 }
3185 else if (flags & F_SIGN)
3186 sign = '+';
3187 else if (flags & F_BLANK)
3188 sign = ' ';
3189 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003190 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003191 }
3192 if (width < len)
3193 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003194 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003195 reslen -= rescnt;
3196 rescnt = width + fmtcnt + 100;
3197 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003198 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003199 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003200 res = PyString_AsString(result)
3201 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003202 }
3203 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003204 if (fill != ' ')
3205 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003206 rescnt--;
3207 if (width > len)
3208 width--;
3209 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003210 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3211 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003212 assert(pbuf[1] == c);
3213 if (fill != ' ') {
3214 *res++ = *pbuf++;
3215 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003216 }
Tim Petersfff53252001-04-12 18:38:48 +00003217 rescnt -= 2;
3218 width -= 2;
3219 if (width < 0)
3220 width = 0;
3221 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003222 }
3223 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003224 do {
3225 --rescnt;
3226 *res++ = fill;
3227 } while (--width > len);
3228 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003229 if (fill == ' ') {
3230 if (sign)
3231 *res++ = sign;
3232 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003233 (c == 'x' || c == 'X')) {
3234 assert(pbuf[0] == '0');
3235 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003236 *res++ = *pbuf++;
3237 *res++ = *pbuf++;
3238 }
3239 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003240 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003241 res += len;
3242 rescnt -= len;
3243 while (--width >= len) {
3244 --rescnt;
3245 *res++ = ' ';
3246 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003247 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003248 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003249 "not all arguments converted");
3250 goto error;
3251 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003252 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003253 } /* '%' */
3254 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003255 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003256 PyErr_SetString(PyExc_TypeError,
3257 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003258 goto error;
3259 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003260 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003261 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003262 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003263 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003264 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003265
3266 unicode:
3267 if (args_owned) {
3268 Py_DECREF(args);
3269 args_owned = 0;
3270 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003271 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003272 if (PyTuple_Check(orig_args) && argidx > 0) {
3273 PyObject *v;
3274 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3275 v = PyTuple_New(n);
3276 if (v == NULL)
3277 goto error;
3278 while (--n >= 0) {
3279 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3280 Py_INCREF(w);
3281 PyTuple_SET_ITEM(v, n, w);
3282 }
3283 args = v;
3284 } else {
3285 Py_INCREF(orig_args);
3286 args = orig_args;
3287 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003288 args_owned = 1;
3289 /* Take what we have of the result and let the Unicode formatting
3290 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003291 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003292 if (_PyString_Resize(&result, rescnt))
3293 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003294 fmtcnt = PyString_GET_SIZE(format) - \
3295 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003296 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3297 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003298 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003299 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003300 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003301 if (v == NULL)
3302 goto error;
3303 /* Paste what we have (result) to what the Unicode formatting
3304 function returned (v) and return the result (or error) */
3305 w = PyUnicode_Concat(result, v);
3306 Py_DECREF(result);
3307 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003308 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003309 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003310
Guido van Rossume5372401993-03-16 12:15:04 +00003311 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003312 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003313 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003314 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003315 }
Guido van Rossume5372401993-03-16 12:15:04 +00003316 return NULL;
3317}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003318
3319
3320#ifdef INTERN_STRINGS
3321
Barry Warsaw4df762f2000-08-16 23:41:01 +00003322/* This dictionary will leak at PyString_Fini() time. That's acceptable
3323 * because PyString_Fini() specifically frees interned strings that are
3324 * only referenced by this dictionary. The CVS log entry for revision 2.45
3325 * says:
3326 *
3327 * Change the Fini function to only remove otherwise unreferenced
3328 * strings from the interned table. There are references in
3329 * hard-to-find static variables all over the interpreter, and it's not
3330 * worth trying to get rid of all those; but "uninterning" isn't fair
3331 * either and may cause subtle failures later -- so we have to keep them
3332 * in the interned table.
3333 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003334static PyObject *interned;
3335
3336void
Fred Drakeba096332000-07-09 07:04:36 +00003337PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003338{
3339 register PyStringObject *s = (PyStringObject *)(*p);
3340 PyObject *t;
3341 if (s == NULL || !PyString_Check(s))
3342 Py_FatalError("PyString_InternInPlace: strings only please!");
3343 if ((t = s->ob_sinterned) != NULL) {
3344 if (t == (PyObject *)s)
3345 return;
3346 Py_INCREF(t);
3347 *p = t;
3348 Py_DECREF(s);
3349 return;
3350 }
3351 if (interned == NULL) {
3352 interned = PyDict_New();
3353 if (interned == NULL)
3354 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003355 }
3356 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3357 Py_INCREF(t);
3358 *p = s->ob_sinterned = t;
3359 Py_DECREF(s);
3360 return;
3361 }
3362 t = (PyObject *)s;
3363 if (PyDict_SetItem(interned, t, t) == 0) {
3364 s->ob_sinterned = t;
3365 return;
3366 }
3367 PyErr_Clear();
3368}
3369
3370
3371PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003372PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003373{
3374 PyObject *s = PyString_FromString(cp);
3375 if (s == NULL)
3376 return NULL;
3377 PyString_InternInPlace(&s);
3378 return s;
3379}
3380
3381#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003382
3383void
Fred Drakeba096332000-07-09 07:04:36 +00003384PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003385{
3386 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003387 for (i = 0; i < UCHAR_MAX + 1; i++) {
3388 Py_XDECREF(characters[i]);
3389 characters[i] = NULL;
3390 }
3391#ifndef DONT_SHARE_SHORT_STRINGS
3392 Py_XDECREF(nullstring);
3393 nullstring = NULL;
3394#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003395#ifdef INTERN_STRINGS
3396 if (interned) {
3397 int pos, changed;
3398 PyObject *key, *value;
3399 do {
3400 changed = 0;
3401 pos = 0;
3402 while (PyDict_Next(interned, &pos, &key, &value)) {
3403 if (key->ob_refcnt == 2 && key == value) {
3404 PyDict_DelItem(interned, key);
3405 changed = 1;
3406 }
3407 }
3408 } while (changed);
3409 }
3410#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003411}
Barry Warsawa903ad982001-02-23 16:40:48 +00003412
3413#ifdef INTERN_STRINGS
3414void _Py_ReleaseInternedStrings(void)
3415{
3416 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003417 fprintf(stderr, "releasing interned strings\n");
3418 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003419 Py_DECREF(interned);
3420 interned = NULL;
3421 }
3422}
3423#endif /* INTERN_STRINGS */