blob: 3d12588d2c6de2cde40b18f30979e0b450137225 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000150PyObject *PyString_Decode(const char *s,
151 int size,
152 const char *encoding,
153 const char *errors)
154{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000155 PyObject *v, *str;
156
157 str = PyString_FromStringAndSize(s, size);
158 if (str == NULL)
159 return NULL;
160 v = PyString_AsDecodedString(str, encoding, errors);
161 Py_DECREF(str);
162 return v;
163}
164
165PyObject *PyString_AsDecodedObject(PyObject *str,
166 const char *encoding,
167 const char *errors)
168{
169 PyObject *v;
170
171 if (!PyString_Check(str)) {
172 PyErr_BadArgument();
173 goto onError;
174 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000175
176 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000177 encoding = PyUnicode_GetDefaultEncoding();
178
179 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000180 v = PyCodec_Decode(str, encoding, errors);
181 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000182 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000183
184 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000185
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000186 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000187 return NULL;
188}
189
190PyObject *PyString_AsDecodedString(PyObject *str,
191 const char *encoding,
192 const char *errors)
193{
194 PyObject *v;
195
196 v = PyString_AsDecodedObject(str, encoding, errors);
197 if (v == NULL)
198 goto onError;
199
200 /* Convert Unicode to a string using the default encoding */
201 if (PyUnicode_Check(v)) {
202 PyObject *temp = v;
203 v = PyUnicode_AsEncodedString(v, NULL, NULL);
204 Py_DECREF(temp);
205 if (v == NULL)
206 goto onError;
207 }
208 if (!PyString_Check(v)) {
209 PyErr_Format(PyExc_TypeError,
210 "decoder did not return a string object (type=%.400s)",
211 v->ob_type->tp_name);
212 Py_DECREF(v);
213 goto onError;
214 }
215
216 return v;
217
218 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000219 return NULL;
220}
221
222PyObject *PyString_Encode(const char *s,
223 int size,
224 const char *encoding,
225 const char *errors)
226{
227 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000228
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000229 str = PyString_FromStringAndSize(s, size);
230 if (str == NULL)
231 return NULL;
232 v = PyString_AsEncodedString(str, encoding, errors);
233 Py_DECREF(str);
234 return v;
235}
236
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000237PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000238 const char *encoding,
239 const char *errors)
240{
241 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000242
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000243 if (!PyString_Check(str)) {
244 PyErr_BadArgument();
245 goto onError;
246 }
247
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000248 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000249 encoding = PyUnicode_GetDefaultEncoding();
250
251 /* Encode via the codec registry */
252 v = PyCodec_Encode(str, encoding, errors);
253 if (v == NULL)
254 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000255
256 return v;
257
258 onError:
259 return NULL;
260}
261
262PyObject *PyString_AsEncodedString(PyObject *str,
263 const char *encoding,
264 const char *errors)
265{
266 PyObject *v;
267
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000268 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000269 if (v == NULL)
270 goto onError;
271
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000272 /* Convert Unicode to a string using the default encoding */
273 if (PyUnicode_Check(v)) {
274 PyObject *temp = v;
275 v = PyUnicode_AsEncodedString(v, NULL, NULL);
276 Py_DECREF(temp);
277 if (v == NULL)
278 goto onError;
279 }
280 if (!PyString_Check(v)) {
281 PyErr_Format(PyExc_TypeError,
282 "encoder did not return a string object (type=%.400s)",
283 v->ob_type->tp_name);
284 Py_DECREF(v);
285 goto onError;
286 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000287
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000288 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000289
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000290 onError:
291 return NULL;
292}
293
Guido van Rossum234f9421993-06-17 12:35:49 +0000294static void
Fred Drakeba096332000-07-09 07:04:36 +0000295string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000296{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000297 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000298}
299
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000300static int
301string_getsize(register PyObject *op)
302{
303 char *s;
304 int len;
305 if (PyString_AsStringAndSize(op, &s, &len))
306 return -1;
307 return len;
308}
309
310static /*const*/ char *
311string_getbuffer(register PyObject *op)
312{
313 char *s;
314 int len;
315 if (PyString_AsStringAndSize(op, &s, &len))
316 return NULL;
317 return s;
318}
319
Guido van Rossumd7047b31995-01-02 19:07:15 +0000320int
Fred Drakeba096332000-07-09 07:04:36 +0000321PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000323 if (!PyString_Check(op))
324 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000325 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326}
327
328/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000329PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000331 if (!PyString_Check(op))
332 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000333 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334}
335
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000336int
337PyString_AsStringAndSize(register PyObject *obj,
338 register char **s,
339 register int *len)
340{
341 if (s == NULL) {
342 PyErr_BadInternalCall();
343 return -1;
344 }
345
346 if (!PyString_Check(obj)) {
347 if (PyUnicode_Check(obj)) {
348 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
349 if (obj == NULL)
350 return -1;
351 }
352 else {
353 PyErr_Format(PyExc_TypeError,
354 "expected string or Unicode object, "
355 "%.200s found", obj->ob_type->tp_name);
356 return -1;
357 }
358 }
359
360 *s = PyString_AS_STRING(obj);
361 if (len != NULL)
362 *len = PyString_GET_SIZE(obj);
363 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
364 PyErr_SetString(PyExc_TypeError,
365 "expected string without null bytes");
366 return -1;
367 }
368 return 0;
369}
370
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000371/* Methods */
372
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000373static int
Fred Drakeba096332000-07-09 07:04:36 +0000374string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000375{
376 int i;
377 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000378 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000379 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000380 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000381 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000382 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000383 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000384
Thomas Wouters7e474022000-07-16 12:04:32 +0000385 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000386 quote = '\'';
387 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
388 quote = '"';
389
390 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391 for (i = 0; i < op->ob_size; i++) {
392 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000393 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000394 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000395 else if (c == '\t')
396 fprintf(fp, "\\t");
397 else if (c == '\n')
398 fprintf(fp, "\\n");
399 else if (c == '\r')
400 fprintf(fp, "\\r");
401 else if (c < ' ' || c >= 0x7f)
402 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000403 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000404 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000405 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000406 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000407 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000408}
409
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000410static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000411string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000412{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000413 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
414 PyObject *v;
415 if (newsize > INT_MAX) {
416 PyErr_SetString(PyExc_OverflowError,
417 "string is too large to make repr");
418 }
419 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000420 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000421 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000422 }
423 else {
424 register int i;
425 register char c;
426 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000427 int quote;
428
Thomas Wouters7e474022000-07-16 12:04:32 +0000429 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000430 quote = '\'';
431 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
432 quote = '"';
433
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000434 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000435 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000436 for (i = 0; i < op->ob_size; i++) {
437 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000438 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000439 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000440 else if (c == '\t')
441 *p++ = '\\', *p++ = 't';
442 else if (c == '\n')
443 *p++ = '\\', *p++ = 'n';
444 else if (c == '\r')
445 *p++ = '\\', *p++ = 'r';
446 else if (c < ' ' || c >= 0x7f) {
447 sprintf(p, "\\x%02x", c & 0xff);
448 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000449 }
450 else
451 *p++ = c;
452 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000453 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000455 _PyString_Resize(
456 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000457 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000458 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459}
460
Guido van Rossum189f1df2001-05-01 16:51:53 +0000461static PyObject *
462string_str(PyObject *s)
463{
464 Py_INCREF(s);
465 return s;
466}
467
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000468static int
Fred Drakeba096332000-07-09 07:04:36 +0000469string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000470{
471 return a->ob_size;
472}
473
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000474static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000475string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000476{
477 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000478 register PyStringObject *op;
479 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000480 if (PyUnicode_Check(bb))
481 return PyUnicode_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000482 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000483 "cannot add type \"%.200s\" to string",
484 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000485 return NULL;
486 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000487#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000488 /* Optimize cases with empty left or right operand */
489 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000490 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000491 return bb;
492 }
493 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000494 Py_INCREF(a);
495 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000496 }
497 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000498 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000499 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000500 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000501 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000502 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000503 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000504#ifdef CACHE_HASH
505 op->ob_shash = -1;
506#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000507#ifdef INTERN_STRINGS
508 op->ob_sinterned = NULL;
509#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000510 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
511 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
512 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000513 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000514#undef b
515}
516
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000517static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000518string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519{
520 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000521 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000522 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000523 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524 if (n < 0)
525 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000526 /* watch out for overflows: the size can overflow int,
527 * and the # of bytes needed can overflow size_t
528 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000530 if (n && size / n != a->ob_size) {
531 PyErr_SetString(PyExc_OverflowError,
532 "repeated string is too long");
533 return NULL;
534 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000535 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000536 Py_INCREF(a);
537 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000538 }
Tim Peters8f422462000-09-09 06:13:41 +0000539 nbytes = size * sizeof(char);
540 if (nbytes / sizeof(char) != (size_t)size ||
541 nbytes + sizeof(PyStringObject) <= nbytes) {
542 PyErr_SetString(PyExc_OverflowError,
543 "repeated string is too long");
544 return NULL;
545 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000546 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000547 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000548 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000549 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000550 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000551#ifdef CACHE_HASH
552 op->ob_shash = -1;
553#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000554#ifdef INTERN_STRINGS
555 op->ob_sinterned = NULL;
556#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000557 for (i = 0; i < size; i += a->ob_size)
558 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
559 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000560 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000561}
562
563/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
564
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000566string_slice(register PyStringObject *a, register int i, register int j)
567 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568{
569 if (i < 0)
570 i = 0;
571 if (j < 0)
572 j = 0; /* Avoid signed/unsigned bug in next line */
573 if (j > a->ob_size)
574 j = a->ob_size;
575 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000576 Py_INCREF(a);
577 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000578 }
579 if (j < i)
580 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000581 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000582}
583
Guido van Rossum9284a572000-03-07 15:53:43 +0000584static int
Fred Drakeba096332000-07-09 07:04:36 +0000585string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000586{
587 register char *s, *end;
588 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000589 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000590 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000591 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000592 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000593 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000594 return -1;
595 }
596 c = PyString_AsString(el)[0];
597 s = PyString_AsString(a);
598 end = s + PyString_Size(a);
599 while (s < end) {
600 if (c == *s++)
601 return 1;
602 }
603 return 0;
604}
605
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000606static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000607string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000608{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000609 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000610 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000611 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000612 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613 return NULL;
614 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000615 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000616 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000617 if (v == NULL)
618 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000619 else {
620#ifdef COUNT_ALLOCS
621 one_strings++;
622#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000623 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000624 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000625 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000626}
627
Martin v. Löwiscd353062001-05-24 16:56:35 +0000628static PyObject*
629string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000630{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000631 int c;
632 int len_a, len_b;
633 int min_len;
634 PyObject *result;
635
636 /* One of the objects is a string object. Make sure the
637 other one is one, too. */
638 if (a->ob_type != b->ob_type) {
639 result = Py_NotImplemented;
640 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000641 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000642 if (a == b) {
643 switch (op) {
644 case Py_EQ:case Py_LE:case Py_GE:
645 result = Py_True;
646 goto out;
647 case Py_NE:case Py_LT:case Py_GT:
648 result = Py_False;
649 goto out;
650 }
651 }
652 if (op == Py_EQ) {
653 /* Supporting Py_NE here as well does not save
654 much time, since Py_NE is rarely used. */
655 if (a->ob_size == b->ob_size
656 && (a->ob_sval[0] == b->ob_sval[0]
657 && memcmp(a->ob_sval, b->ob_sval,
658 a->ob_size) == 0)) {
659 result = Py_True;
660 } else {
661 result = Py_False;
662 }
663 goto out;
664 }
665 len_a = a->ob_size; len_b = b->ob_size;
666 min_len = (len_a < len_b) ? len_a : len_b;
667 if (min_len > 0) {
668 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
669 if (c==0)
670 c = memcmp(a->ob_sval, b->ob_sval, min_len);
671 }else
672 c = 0;
673 if (c == 0)
674 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
675 switch (op) {
676 case Py_LT: c = c < 0; break;
677 case Py_LE: c = c <= 0; break;
678 case Py_EQ: assert(0); break; /* unreachable */
679 case Py_NE: c = c != 0; break;
680 case Py_GT: c = c > 0; break;
681 case Py_GE: c = c >= 0; break;
682 default:
683 result = Py_NotImplemented;
684 goto out;
685 }
686 result = c ? Py_True : Py_False;
687 out:
688 Py_INCREF(result);
689 return result;
690}
691
692int
693_PyString_Eq(PyObject *o1, PyObject *o2)
694{
695 PyStringObject *a, *b;
696 a = (PyStringObject*)o1;
697 b = (PyStringObject*)o2;
698 return a->ob_size == b->ob_size
699 && *a->ob_sval == *b->ob_sval
700 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000701}
702
Guido van Rossum9bfef441993-03-29 10:43:31 +0000703static long
Fred Drakeba096332000-07-09 07:04:36 +0000704string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000705{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000706 register int len;
707 register unsigned char *p;
708 register long x;
709
710#ifdef CACHE_HASH
711 if (a->ob_shash != -1)
712 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000713#ifdef INTERN_STRINGS
714 if (a->ob_sinterned != NULL)
715 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000716 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000717#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000718#endif
719 len = a->ob_size;
720 p = (unsigned char *) a->ob_sval;
721 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000722 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000723 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000724 x ^= a->ob_size;
725 if (x == -1)
726 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000727#ifdef CACHE_HASH
728 a->ob_shash = x;
729#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000730 return x;
731}
732
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000733static int
Fred Drakeba096332000-07-09 07:04:36 +0000734string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000735{
736 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000737 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000738 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000739 return -1;
740 }
741 *ptr = (void *)self->ob_sval;
742 return self->ob_size;
743}
744
745static int
Fred Drakeba096332000-07-09 07:04:36 +0000746string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000747{
Guido van Rossum045e6881997-09-08 18:30:11 +0000748 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000749 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000750 return -1;
751}
752
753static int
Fred Drakeba096332000-07-09 07:04:36 +0000754string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000755{
756 if ( lenp )
757 *lenp = self->ob_size;
758 return 1;
759}
760
Guido van Rossum1db70701998-10-08 02:18:52 +0000761static int
Fred Drakeba096332000-07-09 07:04:36 +0000762string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000763{
764 if ( index != 0 ) {
765 PyErr_SetString(PyExc_SystemError,
766 "accessing non-existent string segment");
767 return -1;
768 }
769 *ptr = self->ob_sval;
770 return self->ob_size;
771}
772
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000773static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000774 (inquiry)string_length, /*sq_length*/
775 (binaryfunc)string_concat, /*sq_concat*/
776 (intargfunc)string_repeat, /*sq_repeat*/
777 (intargfunc)string_item, /*sq_item*/
778 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000779 0, /*sq_ass_item*/
780 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000781 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000782};
783
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000784static PyBufferProcs string_as_buffer = {
785 (getreadbufferproc)string_buffer_getreadbuf,
786 (getwritebufferproc)string_buffer_getwritebuf,
787 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000788 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000789};
790
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000791
792
793#define LEFTSTRIP 0
794#define RIGHTSTRIP 1
795#define BOTHSTRIP 2
796
797
798static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000799split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000800{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000801 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000802 PyObject* item;
803 PyObject *list = PyList_New(0);
804
805 if (list == NULL)
806 return NULL;
807
Guido van Rossum4c08d552000-03-10 22:55:18 +0000808 for (i = j = 0; i < len; ) {
809 while (i < len && isspace(Py_CHARMASK(s[i])))
810 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000811 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000812 while (i < len && !isspace(Py_CHARMASK(s[i])))
813 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000814 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000815 if (maxsplit-- <= 0)
816 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000817 item = PyString_FromStringAndSize(s+j, (int)(i-j));
818 if (item == NULL)
819 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000820 err = PyList_Append(list, item);
821 Py_DECREF(item);
822 if (err < 0)
823 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000824 while (i < len && isspace(Py_CHARMASK(s[i])))
825 i++;
826 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000827 }
828 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000829 if (j < len) {
830 item = PyString_FromStringAndSize(s+j, (int)(len - j));
831 if (item == NULL)
832 goto finally;
833 err = PyList_Append(list, item);
834 Py_DECREF(item);
835 if (err < 0)
836 goto finally;
837 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000838 return list;
839 finally:
840 Py_DECREF(list);
841 return NULL;
842}
843
844
845static char split__doc__[] =
846"S.split([sep [,maxsplit]]) -> list of strings\n\
847\n\
848Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000849delimiter string. If maxsplit is given, at most maxsplit\n\
850splits are done. If sep is not specified, any whitespace string\n\
851is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000852
853static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000854string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000855{
856 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000857 int maxsplit = -1;
858 const char *s = PyString_AS_STRING(self), *sub;
859 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000860
Guido van Rossum4c08d552000-03-10 22:55:18 +0000861 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000862 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000863 if (maxsplit < 0)
864 maxsplit = INT_MAX;
865 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000866 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000867 if (PyString_Check(subobj)) {
868 sub = PyString_AS_STRING(subobj);
869 n = PyString_GET_SIZE(subobj);
870 }
871 else if (PyUnicode_Check(subobj))
872 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
873 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
874 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000875 if (n == 0) {
876 PyErr_SetString(PyExc_ValueError, "empty separator");
877 return NULL;
878 }
879
880 list = PyList_New(0);
881 if (list == NULL)
882 return NULL;
883
884 i = j = 0;
885 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000886 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000887 if (maxsplit-- <= 0)
888 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889 item = PyString_FromStringAndSize(s+j, (int)(i-j));
890 if (item == NULL)
891 goto fail;
892 err = PyList_Append(list, item);
893 Py_DECREF(item);
894 if (err < 0)
895 goto fail;
896 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000897 }
898 else
899 i++;
900 }
901 item = PyString_FromStringAndSize(s+j, (int)(len-j));
902 if (item == NULL)
903 goto fail;
904 err = PyList_Append(list, item);
905 Py_DECREF(item);
906 if (err < 0)
907 goto fail;
908
909 return list;
910
911 fail:
912 Py_DECREF(list);
913 return NULL;
914}
915
916
917static char join__doc__[] =
918"S.join(sequence) -> string\n\
919\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000920Return a string which is the concatenation of the strings in the\n\
921sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000922
923static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000924string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000925{
926 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000927 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000928 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000929 char *p;
930 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000931 size_t sz = 0;
932 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000933 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000934
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000935 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000936 return NULL;
937
Tim Peters19fe14e2001-01-19 03:03:47 +0000938 seq = PySequence_Fast(orig, "");
939 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000940 if (PyErr_ExceptionMatches(PyExc_TypeError))
941 PyErr_Format(PyExc_TypeError,
942 "sequence expected, %.80s found",
943 orig->ob_type->tp_name);
944 return NULL;
945 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000946
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000947 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000948 if (seqlen == 0) {
949 Py_DECREF(seq);
950 return PyString_FromString("");
951 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000952 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000953 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000954 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
955 PyErr_Format(PyExc_TypeError,
956 "sequence item 0: expected string,"
957 " %.80s found",
958 item->ob_type->tp_name);
959 Py_DECREF(seq);
960 return NULL;
961 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000962 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000963 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000964 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000965 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000966
Tim Peters19fe14e2001-01-19 03:03:47 +0000967 /* There are at least two things to join. Do a pre-pass to figure out
968 * the total amount of space we'll need (sz), see whether any argument
969 * is absurd, and defer to the Unicode join if appropriate.
970 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000971 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000972 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000973 item = PySequence_Fast_GET_ITEM(seq, i);
974 if (!PyString_Check(item)){
975 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +0000976 /* Defer to Unicode join.
977 * CAUTION: There's no gurantee that the
978 * original sequence can be iterated over
979 * again, so we must pass seq here.
980 */
981 PyObject *result;
982 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000983 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +0000984 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000985 }
986 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000987 "sequence item %i: expected string,"
988 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000989 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000990 Py_DECREF(seq);
991 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000992 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000993 sz += PyString_GET_SIZE(item);
994 if (i != 0)
995 sz += seplen;
996 if (sz < old_sz || sz > INT_MAX) {
997 PyErr_SetString(PyExc_OverflowError,
998 "join() is too long for a Python string");
999 Py_DECREF(seq);
1000 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001001 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001002 }
1003
1004 /* Allocate result space. */
1005 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1006 if (res == NULL) {
1007 Py_DECREF(seq);
1008 return NULL;
1009 }
1010
1011 /* Catenate everything. */
1012 p = PyString_AS_STRING(res);
1013 for (i = 0; i < seqlen; ++i) {
1014 size_t n;
1015 item = PySequence_Fast_GET_ITEM(seq, i);
1016 n = PyString_GET_SIZE(item);
1017 memcpy(p, PyString_AS_STRING(item), n);
1018 p += n;
1019 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001020 memcpy(p, sep, seplen);
1021 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001022 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001023 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001024
Jeremy Hylton49048292000-07-11 03:28:17 +00001025 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001026 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001027}
1028
Tim Peters52e155e2001-06-16 05:42:57 +00001029PyObject *
1030_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001031{
1032 PyObject* args;
1033 PyObject* result = NULL;
1034
1035 assert(sep != NULL && PyString_Check(sep));
1036 assert(x != NULL);
1037 args = PyTuple_New(1);
1038 if (args != NULL) {
1039 Py_INCREF(x);
1040 PyTuple_SET_ITEM(args, 0, x);
1041 result = string_join((PyStringObject *)sep, args);
1042 Py_DECREF(args);
1043 }
1044 return result;
1045}
1046
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001047static long
Fred Drakeba096332000-07-09 07:04:36 +00001048string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001049{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001050 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001051 int len = PyString_GET_SIZE(self);
1052 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001053 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001055 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001056 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001057 return -2;
1058 if (PyString_Check(subobj)) {
1059 sub = PyString_AS_STRING(subobj);
1060 n = PyString_GET_SIZE(subobj);
1061 }
1062 else if (PyUnicode_Check(subobj))
1063 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
1064 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001065 return -2;
1066
1067 if (last > len)
1068 last = len;
1069 if (last < 0)
1070 last += len;
1071 if (last < 0)
1072 last = 0;
1073 if (i < 0)
1074 i += len;
1075 if (i < 0)
1076 i = 0;
1077
Guido van Rossum4c08d552000-03-10 22:55:18 +00001078 if (dir > 0) {
1079 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001080 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001081 last -= n;
1082 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001083 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001084 return (long)i;
1085 }
1086 else {
1087 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001088
Guido van Rossum4c08d552000-03-10 22:55:18 +00001089 if (n == 0 && i <= last)
1090 return (long)last;
1091 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001092 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001093 return (long)j;
1094 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001095
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001096 return -1;
1097}
1098
1099
1100static char find__doc__[] =
1101"S.find(sub [,start [,end]]) -> int\n\
1102\n\
1103Return the lowest index in S where substring sub is found,\n\
1104such that sub is contained within s[start,end]. Optional\n\
1105arguments start and end are interpreted as in slice notation.\n\
1106\n\
1107Return -1 on failure.";
1108
1109static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001110string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001111{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001112 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113 if (result == -2)
1114 return NULL;
1115 return PyInt_FromLong(result);
1116}
1117
1118
1119static char index__doc__[] =
1120"S.index(sub [,start [,end]]) -> int\n\
1121\n\
1122Like S.find() but raise ValueError when the substring is not found.";
1123
1124static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001125string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001127 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001128 if (result == -2)
1129 return NULL;
1130 if (result == -1) {
1131 PyErr_SetString(PyExc_ValueError,
1132 "substring not found in string.index");
1133 return NULL;
1134 }
1135 return PyInt_FromLong(result);
1136}
1137
1138
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001139static char rfind__doc__[] =
1140"S.rfind(sub [,start [,end]]) -> int\n\
1141\n\
1142Return the highest index in S where substring sub is found,\n\
1143such that sub is contained within s[start,end]. Optional\n\
1144arguments start and end are interpreted as in slice notation.\n\
1145\n\
1146Return -1 on failure.";
1147
1148static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001149string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001150{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001151 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152 if (result == -2)
1153 return NULL;
1154 return PyInt_FromLong(result);
1155}
1156
1157
1158static char rindex__doc__[] =
1159"S.rindex(sub [,start [,end]]) -> int\n\
1160\n\
1161Like S.rfind() but raise ValueError when the substring is not found.";
1162
1163static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001164string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001165{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001166 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001167 if (result == -2)
1168 return NULL;
1169 if (result == -1) {
1170 PyErr_SetString(PyExc_ValueError,
1171 "substring not found in string.rindex");
1172 return NULL;
1173 }
1174 return PyInt_FromLong(result);
1175}
1176
1177
1178static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001179do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001180{
1181 char *s = PyString_AS_STRING(self);
1182 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001183
Guido van Rossum43713e52000-02-29 13:59:29 +00001184 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001185 return NULL;
1186
1187 i = 0;
1188 if (striptype != RIGHTSTRIP) {
1189 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1190 i++;
1191 }
1192 }
1193
1194 j = len;
1195 if (striptype != LEFTSTRIP) {
1196 do {
1197 j--;
1198 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1199 j++;
1200 }
1201
1202 if (i == 0 && j == len) {
1203 Py_INCREF(self);
1204 return (PyObject*)self;
1205 }
1206 else
1207 return PyString_FromStringAndSize(s+i, j-i);
1208}
1209
1210
1211static char strip__doc__[] =
1212"S.strip() -> string\n\
1213\n\
1214Return a copy of the string S with leading and trailing\n\
1215whitespace removed.";
1216
1217static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001218string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001219{
1220 return do_strip(self, args, BOTHSTRIP);
1221}
1222
1223
1224static char lstrip__doc__[] =
1225"S.lstrip() -> string\n\
1226\n\
1227Return a copy of the string S with leading whitespace removed.";
1228
1229static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001230string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001231{
1232 return do_strip(self, args, LEFTSTRIP);
1233}
1234
1235
1236static char rstrip__doc__[] =
1237"S.rstrip() -> string\n\
1238\n\
1239Return a copy of the string S with trailing whitespace removed.";
1240
1241static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001242string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001243{
1244 return do_strip(self, args, RIGHTSTRIP);
1245}
1246
1247
1248static char lower__doc__[] =
1249"S.lower() -> string\n\
1250\n\
1251Return a copy of the string S converted to lowercase.";
1252
1253static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001254string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001255{
1256 char *s = PyString_AS_STRING(self), *s_new;
1257 int i, n = PyString_GET_SIZE(self);
1258 PyObject *new;
1259
Guido van Rossum43713e52000-02-29 13:59:29 +00001260 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001261 return NULL;
1262 new = PyString_FromStringAndSize(NULL, n);
1263 if (new == NULL)
1264 return NULL;
1265 s_new = PyString_AsString(new);
1266 for (i = 0; i < n; i++) {
1267 int c = Py_CHARMASK(*s++);
1268 if (isupper(c)) {
1269 *s_new = tolower(c);
1270 } else
1271 *s_new = c;
1272 s_new++;
1273 }
1274 return new;
1275}
1276
1277
1278static char upper__doc__[] =
1279"S.upper() -> string\n\
1280\n\
1281Return a copy of the string S converted to uppercase.";
1282
1283static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001284string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001285{
1286 char *s = PyString_AS_STRING(self), *s_new;
1287 int i, n = PyString_GET_SIZE(self);
1288 PyObject *new;
1289
Guido van Rossum43713e52000-02-29 13:59:29 +00001290 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001291 return NULL;
1292 new = PyString_FromStringAndSize(NULL, n);
1293 if (new == NULL)
1294 return NULL;
1295 s_new = PyString_AsString(new);
1296 for (i = 0; i < n; i++) {
1297 int c = Py_CHARMASK(*s++);
1298 if (islower(c)) {
1299 *s_new = toupper(c);
1300 } else
1301 *s_new = c;
1302 s_new++;
1303 }
1304 return new;
1305}
1306
1307
Guido van Rossum4c08d552000-03-10 22:55:18 +00001308static char title__doc__[] =
1309"S.title() -> string\n\
1310\n\
1311Return a titlecased version of S, i.e. words start with uppercase\n\
1312characters, all remaining cased characters have lowercase.";
1313
1314static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001315string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001316{
1317 char *s = PyString_AS_STRING(self), *s_new;
1318 int i, n = PyString_GET_SIZE(self);
1319 int previous_is_cased = 0;
1320 PyObject *new;
1321
1322 if (!PyArg_ParseTuple(args, ":title"))
1323 return NULL;
1324 new = PyString_FromStringAndSize(NULL, n);
1325 if (new == NULL)
1326 return NULL;
1327 s_new = PyString_AsString(new);
1328 for (i = 0; i < n; i++) {
1329 int c = Py_CHARMASK(*s++);
1330 if (islower(c)) {
1331 if (!previous_is_cased)
1332 c = toupper(c);
1333 previous_is_cased = 1;
1334 } else if (isupper(c)) {
1335 if (previous_is_cased)
1336 c = tolower(c);
1337 previous_is_cased = 1;
1338 } else
1339 previous_is_cased = 0;
1340 *s_new++ = c;
1341 }
1342 return new;
1343}
1344
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001345static char capitalize__doc__[] =
1346"S.capitalize() -> string\n\
1347\n\
1348Return a copy of the string S with only its first character\n\
1349capitalized.";
1350
1351static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001352string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001353{
1354 char *s = PyString_AS_STRING(self), *s_new;
1355 int i, n = PyString_GET_SIZE(self);
1356 PyObject *new;
1357
Guido van Rossum43713e52000-02-29 13:59:29 +00001358 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001359 return NULL;
1360 new = PyString_FromStringAndSize(NULL, n);
1361 if (new == NULL)
1362 return NULL;
1363 s_new = PyString_AsString(new);
1364 if (0 < n) {
1365 int c = Py_CHARMASK(*s++);
1366 if (islower(c))
1367 *s_new = toupper(c);
1368 else
1369 *s_new = c;
1370 s_new++;
1371 }
1372 for (i = 1; i < n; i++) {
1373 int c = Py_CHARMASK(*s++);
1374 if (isupper(c))
1375 *s_new = tolower(c);
1376 else
1377 *s_new = c;
1378 s_new++;
1379 }
1380 return new;
1381}
1382
1383
1384static char count__doc__[] =
1385"S.count(sub[, start[, end]]) -> int\n\
1386\n\
1387Return the number of occurrences of substring sub in string\n\
1388S[start:end]. Optional arguments start and end are\n\
1389interpreted as in slice notation.";
1390
1391static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001392string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001393{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001394 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001395 int len = PyString_GET_SIZE(self), n;
1396 int i = 0, last = INT_MAX;
1397 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001398 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001399
Guido van Rossumc6821402000-05-08 14:08:05 +00001400 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1401 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001402 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001403
Guido van Rossum4c08d552000-03-10 22:55:18 +00001404 if (PyString_Check(subobj)) {
1405 sub = PyString_AS_STRING(subobj);
1406 n = PyString_GET_SIZE(subobj);
1407 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001408 else if (PyUnicode_Check(subobj)) {
1409 int count;
1410 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1411 if (count == -1)
1412 return NULL;
1413 else
1414 return PyInt_FromLong((long) count);
1415 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001416 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1417 return NULL;
1418
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419 if (last > len)
1420 last = len;
1421 if (last < 0)
1422 last += len;
1423 if (last < 0)
1424 last = 0;
1425 if (i < 0)
1426 i += len;
1427 if (i < 0)
1428 i = 0;
1429 m = last + 1 - n;
1430 if (n == 0)
1431 return PyInt_FromLong((long) (m-i));
1432
1433 r = 0;
1434 while (i < m) {
1435 if (!memcmp(s+i, sub, n)) {
1436 r++;
1437 i += n;
1438 } else {
1439 i++;
1440 }
1441 }
1442 return PyInt_FromLong((long) r);
1443}
1444
1445
1446static char swapcase__doc__[] =
1447"S.swapcase() -> string\n\
1448\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001449Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450converted to lowercase and vice versa.";
1451
1452static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001453string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001454{
1455 char *s = PyString_AS_STRING(self), *s_new;
1456 int i, n = PyString_GET_SIZE(self);
1457 PyObject *new;
1458
Guido van Rossum43713e52000-02-29 13:59:29 +00001459 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001460 return NULL;
1461 new = PyString_FromStringAndSize(NULL, n);
1462 if (new == NULL)
1463 return NULL;
1464 s_new = PyString_AsString(new);
1465 for (i = 0; i < n; i++) {
1466 int c = Py_CHARMASK(*s++);
1467 if (islower(c)) {
1468 *s_new = toupper(c);
1469 }
1470 else if (isupper(c)) {
1471 *s_new = tolower(c);
1472 }
1473 else
1474 *s_new = c;
1475 s_new++;
1476 }
1477 return new;
1478}
1479
1480
1481static char translate__doc__[] =
1482"S.translate(table [,deletechars]) -> string\n\
1483\n\
1484Return a copy of the string S, where all characters occurring\n\
1485in the optional argument deletechars are removed, and the\n\
1486remaining characters have been mapped through the given\n\
1487translation table, which must be a string of length 256.";
1488
1489static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001490string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001492 register char *input, *output;
1493 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001494 register int i, c, changed = 0;
1495 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001496 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001497 int inlen, tablen, dellen = 0;
1498 PyObject *result;
1499 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001500 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001501
Guido van Rossum4c08d552000-03-10 22:55:18 +00001502 if (!PyArg_ParseTuple(args, "O|O:translate",
1503 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001504 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001505
1506 if (PyString_Check(tableobj)) {
1507 table1 = PyString_AS_STRING(tableobj);
1508 tablen = PyString_GET_SIZE(tableobj);
1509 }
1510 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001511 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001512 parameter; instead a mapping to None will cause characters
1513 to be deleted. */
1514 if (delobj != NULL) {
1515 PyErr_SetString(PyExc_TypeError,
1516 "deletions are implemented differently for unicode");
1517 return NULL;
1518 }
1519 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1520 }
1521 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001522 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001523
1524 if (delobj != NULL) {
1525 if (PyString_Check(delobj)) {
1526 del_table = PyString_AS_STRING(delobj);
1527 dellen = PyString_GET_SIZE(delobj);
1528 }
1529 else if (PyUnicode_Check(delobj)) {
1530 PyErr_SetString(PyExc_TypeError,
1531 "deletions are implemented differently for unicode");
1532 return NULL;
1533 }
1534 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1535 return NULL;
1536
1537 if (tablen != 256) {
1538 PyErr_SetString(PyExc_ValueError,
1539 "translation table must be 256 characters long");
1540 return NULL;
1541 }
1542 }
1543 else {
1544 del_table = NULL;
1545 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001546 }
1547
1548 table = table1;
1549 inlen = PyString_Size(input_obj);
1550 result = PyString_FromStringAndSize((char *)NULL, inlen);
1551 if (result == NULL)
1552 return NULL;
1553 output_start = output = PyString_AsString(result);
1554 input = PyString_AsString(input_obj);
1555
1556 if (dellen == 0) {
1557 /* If no deletions are required, use faster code */
1558 for (i = inlen; --i >= 0; ) {
1559 c = Py_CHARMASK(*input++);
1560 if (Py_CHARMASK((*output++ = table[c])) != c)
1561 changed = 1;
1562 }
1563 if (changed)
1564 return result;
1565 Py_DECREF(result);
1566 Py_INCREF(input_obj);
1567 return input_obj;
1568 }
1569
1570 for (i = 0; i < 256; i++)
1571 trans_table[i] = Py_CHARMASK(table[i]);
1572
1573 for (i = 0; i < dellen; i++)
1574 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1575
1576 for (i = inlen; --i >= 0; ) {
1577 c = Py_CHARMASK(*input++);
1578 if (trans_table[c] != -1)
1579 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1580 continue;
1581 changed = 1;
1582 }
1583 if (!changed) {
1584 Py_DECREF(result);
1585 Py_INCREF(input_obj);
1586 return input_obj;
1587 }
1588 /* Fix the size of the resulting string */
1589 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1590 return NULL;
1591 return result;
1592}
1593
1594
1595/* What follows is used for implementing replace(). Perry Stoll. */
1596
1597/*
1598 mymemfind
1599
1600 strstr replacement for arbitrary blocks of memory.
1601
Barry Warsaw51ac5802000-03-20 16:36:48 +00001602 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603 contents of memory pointed to by PAT. Returns the index into MEM if
1604 found, or -1 if not found. If len of PAT is greater than length of
1605 MEM, the function returns -1.
1606*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001607static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001608mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001609{
1610 register int ii;
1611
1612 /* pattern can not occur in the last pat_len-1 chars */
1613 len -= pat_len;
1614
1615 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001616 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 return ii;
1618 }
1619 }
1620 return -1;
1621}
1622
1623/*
1624 mymemcnt
1625
1626 Return the number of distinct times PAT is found in MEM.
1627 meaning mem=1111 and pat==11 returns 2.
1628 mem=11111 and pat==11 also return 2.
1629 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001630static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001631mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001632{
1633 register int offset = 0;
1634 int nfound = 0;
1635
1636 while (len >= 0) {
1637 offset = mymemfind(mem, len, pat, pat_len);
1638 if (offset == -1)
1639 break;
1640 mem += offset + pat_len;
1641 len -= offset + pat_len;
1642 nfound++;
1643 }
1644 return nfound;
1645}
1646
1647/*
1648 mymemreplace
1649
Thomas Wouters7e474022000-07-16 12:04:32 +00001650 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001651 replaced with SUB.
1652
Thomas Wouters7e474022000-07-16 12:04:32 +00001653 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654 of PAT in STR, then the original string is returned. Otherwise, a new
1655 string is allocated here and returned.
1656
1657 on return, out_len is:
1658 the length of output string, or
1659 -1 if the input string is returned, or
1660 unchanged if an error occurs (no memory).
1661
1662 return value is:
1663 the new string allocated locally, or
1664 NULL if an error occurred.
1665*/
1666static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001667mymemreplace(const char *str, int len, /* input string */
1668 const char *pat, int pat_len, /* pattern string to find */
1669 const char *sub, int sub_len, /* substitution string */
1670 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001671 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672{
1673 char *out_s;
1674 char *new_s;
1675 int nfound, offset, new_len;
1676
1677 if (len == 0 || pat_len > len)
1678 goto return_same;
1679
1680 /* find length of output string */
1681 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001682 if (count < 0)
1683 count = INT_MAX;
1684 else if (nfound > count)
1685 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001686 if (nfound == 0)
1687 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001688
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001689 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001690 if (new_len == 0) {
1691 /* Have to allocate something for the caller to free(). */
1692 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001693 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001694 return NULL;
1695 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001696 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001697 else {
1698 assert(new_len > 0);
1699 new_s = (char *)PyMem_MALLOC(new_len);
1700 if (new_s == NULL)
1701 return NULL;
1702 out_s = new_s;
1703
Tim Peters9c012af2001-05-10 00:32:57 +00001704 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001705 /* find index of next instance of pattern */
1706 offset = mymemfind(str, len, pat, pat_len);
1707 if (offset == -1)
1708 break;
1709
1710 /* copy non matching part of input string */
1711 memcpy(new_s, str, offset);
1712 str += offset + pat_len;
1713 len -= offset + pat_len;
1714
1715 /* copy substitute into the output string */
1716 new_s += offset;
1717 memcpy(new_s, sub, sub_len);
1718 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001719 }
1720 /* copy any remaining values into output string */
1721 if (len > 0)
1722 memcpy(new_s, str, len);
1723 }
1724 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725 return out_s;
1726
1727 return_same:
1728 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001729 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730}
1731
1732
1733static char replace__doc__[] =
1734"S.replace (old, new[, maxsplit]) -> string\n\
1735\n\
1736Return a copy of string S with all occurrences of substring\n\
1737old replaced by new. If the optional argument maxsplit is\n\
1738given, only the first maxsplit occurrences are replaced.";
1739
1740static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001741string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001743 const char *str = PyString_AS_STRING(self), *sub, *repl;
1744 char *new_s;
1745 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1746 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749
Guido van Rossum4c08d552000-03-10 22:55:18 +00001750 if (!PyArg_ParseTuple(args, "OO|i:replace",
1751 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753
1754 if (PyString_Check(subobj)) {
1755 sub = PyString_AS_STRING(subobj);
1756 sub_len = PyString_GET_SIZE(subobj);
1757 }
1758 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001759 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 subobj, replobj, count);
1761 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1762 return NULL;
1763
1764 if (PyString_Check(replobj)) {
1765 repl = PyString_AS_STRING(replobj);
1766 repl_len = PyString_GET_SIZE(replobj);
1767 }
1768 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001769 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001770 subobj, replobj, count);
1771 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1772 return NULL;
1773
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001774 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001775 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776 return NULL;
1777 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001778 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001779 if (new_s == NULL) {
1780 PyErr_NoMemory();
1781 return NULL;
1782 }
1783 if (out_len == -1) {
1784 /* we're returning another reference to self */
1785 new = (PyObject*)self;
1786 Py_INCREF(new);
1787 }
1788 else {
1789 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001790 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001791 }
1792 return new;
1793}
1794
1795
1796static char startswith__doc__[] =
1797"S.startswith(prefix[, start[, end]]) -> int\n\
1798\n\
1799Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1800optional start, test S beginning at that position. With optional end, stop\n\
1801comparing S at that position.";
1802
1803static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001804string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001805{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001806 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001807 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001808 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001809 int plen;
1810 int start = 0;
1811 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001812 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001813
Guido van Rossumc6821402000-05-08 14:08:05 +00001814 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1815 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001816 return NULL;
1817 if (PyString_Check(subobj)) {
1818 prefix = PyString_AS_STRING(subobj);
1819 plen = PyString_GET_SIZE(subobj);
1820 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001821 else if (PyUnicode_Check(subobj)) {
1822 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001823 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001824 subobj, start, end, -1);
1825 if (rc == -1)
1826 return NULL;
1827 else
1828 return PyInt_FromLong((long) rc);
1829 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001830 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001831 return NULL;
1832
1833 /* adopt Java semantics for index out of range. it is legal for
1834 * offset to be == plen, but this only returns true if prefix is
1835 * the empty string.
1836 */
1837 if (start < 0 || start+plen > len)
1838 return PyInt_FromLong(0);
1839
1840 if (!memcmp(str+start, prefix, plen)) {
1841 /* did the match end after the specified end? */
1842 if (end < 0)
1843 return PyInt_FromLong(1);
1844 else if (end - start < plen)
1845 return PyInt_FromLong(0);
1846 else
1847 return PyInt_FromLong(1);
1848 }
1849 else return PyInt_FromLong(0);
1850}
1851
1852
1853static char endswith__doc__[] =
1854"S.endswith(suffix[, start[, end]]) -> int\n\
1855\n\
1856Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1857optional start, test S beginning at that position. With optional end, stop\n\
1858comparing S at that position.";
1859
1860static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001861string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001862{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001863 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001864 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865 const char* suffix;
1866 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867 int start = 0;
1868 int end = -1;
1869 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001871
Guido van Rossumc6821402000-05-08 14:08:05 +00001872 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1873 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001874 return NULL;
1875 if (PyString_Check(subobj)) {
1876 suffix = PyString_AS_STRING(subobj);
1877 slen = PyString_GET_SIZE(subobj);
1878 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001879 else if (PyUnicode_Check(subobj)) {
1880 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001881 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001882 subobj, start, end, +1);
1883 if (rc == -1)
1884 return NULL;
1885 else
1886 return PyInt_FromLong((long) rc);
1887 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001888 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001889 return NULL;
1890
Guido van Rossum4c08d552000-03-10 22:55:18 +00001891 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001892 return PyInt_FromLong(0);
1893
1894 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001895 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001896
Guido van Rossum4c08d552000-03-10 22:55:18 +00001897 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001898 return PyInt_FromLong(1);
1899 else return PyInt_FromLong(0);
1900}
1901
1902
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001903static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001904"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001905\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001906Encodes S using the codec registered for encoding. encoding defaults\n\
1907to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001908handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1909a ValueError. Other possible values are 'ignore' and 'replace'.";
1910
1911static PyObject *
1912string_encode(PyStringObject *self, PyObject *args)
1913{
1914 char *encoding = NULL;
1915 char *errors = NULL;
1916 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1917 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001918 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
1919}
1920
1921
1922static char decode__doc__[] =
1923"S.decode([encoding[,errors]]) -> object\n\
1924\n\
1925Decodes S using the codec registered for encoding. encoding defaults\n\
1926to the default encoding. errors may be given to set a different error\n\
1927handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1928a ValueError. Other possible values are 'ignore' and 'replace'.";
1929
1930static PyObject *
1931string_decode(PyStringObject *self, PyObject *args)
1932{
1933 char *encoding = NULL;
1934 char *errors = NULL;
1935 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
1936 return NULL;
1937 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001938}
1939
1940
Guido van Rossum4c08d552000-03-10 22:55:18 +00001941static char expandtabs__doc__[] =
1942"S.expandtabs([tabsize]) -> string\n\
1943\n\
1944Return a copy of S where all tab characters are expanded using spaces.\n\
1945If tabsize is not given, a tab size of 8 characters is assumed.";
1946
1947static PyObject*
1948string_expandtabs(PyStringObject *self, PyObject *args)
1949{
1950 const char *e, *p;
1951 char *q;
1952 int i, j;
1953 PyObject *u;
1954 int tabsize = 8;
1955
1956 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1957 return NULL;
1958
Thomas Wouters7e474022000-07-16 12:04:32 +00001959 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001960 i = j = 0;
1961 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1962 for (p = PyString_AS_STRING(self); p < e; p++)
1963 if (*p == '\t') {
1964 if (tabsize > 0)
1965 j += tabsize - (j % tabsize);
1966 }
1967 else {
1968 j++;
1969 if (*p == '\n' || *p == '\r') {
1970 i += j;
1971 j = 0;
1972 }
1973 }
1974
1975 /* Second pass: create output string and fill it */
1976 u = PyString_FromStringAndSize(NULL, i + j);
1977 if (!u)
1978 return NULL;
1979
1980 j = 0;
1981 q = PyString_AS_STRING(u);
1982
1983 for (p = PyString_AS_STRING(self); p < e; p++)
1984 if (*p == '\t') {
1985 if (tabsize > 0) {
1986 i = tabsize - (j % tabsize);
1987 j += i;
1988 while (i--)
1989 *q++ = ' ';
1990 }
1991 }
1992 else {
1993 j++;
1994 *q++ = *p;
1995 if (*p == '\n' || *p == '\r')
1996 j = 0;
1997 }
1998
1999 return u;
2000}
2001
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002002static
2003PyObject *pad(PyStringObject *self,
2004 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002005 int right,
2006 char fill)
2007{
2008 PyObject *u;
2009
2010 if (left < 0)
2011 left = 0;
2012 if (right < 0)
2013 right = 0;
2014
2015 if (left == 0 && right == 0) {
2016 Py_INCREF(self);
2017 return (PyObject *)self;
2018 }
2019
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002020 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002021 left + PyString_GET_SIZE(self) + right);
2022 if (u) {
2023 if (left)
2024 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002025 memcpy(PyString_AS_STRING(u) + left,
2026 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002027 PyString_GET_SIZE(self));
2028 if (right)
2029 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2030 fill, right);
2031 }
2032
2033 return u;
2034}
2035
2036static char ljust__doc__[] =
2037"S.ljust(width) -> string\n\
2038\n\
2039Return S left justified in a string of length width. Padding is\n\
2040done using spaces.";
2041
2042static PyObject *
2043string_ljust(PyStringObject *self, PyObject *args)
2044{
2045 int width;
2046 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2047 return NULL;
2048
2049 if (PyString_GET_SIZE(self) >= width) {
2050 Py_INCREF(self);
2051 return (PyObject*) self;
2052 }
2053
2054 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2055}
2056
2057
2058static char rjust__doc__[] =
2059"S.rjust(width) -> string\n\
2060\n\
2061Return S right justified in a string of length width. Padding is\n\
2062done using spaces.";
2063
2064static PyObject *
2065string_rjust(PyStringObject *self, PyObject *args)
2066{
2067 int width;
2068 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2069 return NULL;
2070
2071 if (PyString_GET_SIZE(self) >= width) {
2072 Py_INCREF(self);
2073 return (PyObject*) self;
2074 }
2075
2076 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2077}
2078
2079
2080static char center__doc__[] =
2081"S.center(width) -> string\n\
2082\n\
2083Return S centered in a string of length width. Padding is done\n\
2084using spaces.";
2085
2086static PyObject *
2087string_center(PyStringObject *self, PyObject *args)
2088{
2089 int marg, left;
2090 int width;
2091
2092 if (!PyArg_ParseTuple(args, "i:center", &width))
2093 return NULL;
2094
2095 if (PyString_GET_SIZE(self) >= width) {
2096 Py_INCREF(self);
2097 return (PyObject*) self;
2098 }
2099
2100 marg = width - PyString_GET_SIZE(self);
2101 left = marg / 2 + (marg & width & 1);
2102
2103 return pad(self, left, marg - left, ' ');
2104}
2105
2106#if 0
2107static char zfill__doc__[] =
2108"S.zfill(width) -> string\n\
2109\n\
2110Pad a numeric string x with zeros on the left, to fill a field\n\
2111of the specified width. The string x is never truncated.";
2112
2113static PyObject *
2114string_zfill(PyStringObject *self, PyObject *args)
2115{
2116 int fill;
2117 PyObject *u;
2118 char *str;
2119
2120 int width;
2121 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2122 return NULL;
2123
2124 if (PyString_GET_SIZE(self) >= width) {
2125 Py_INCREF(self);
2126 return (PyObject*) self;
2127 }
2128
2129 fill = width - PyString_GET_SIZE(self);
2130
2131 u = pad(self, fill, 0, '0');
2132 if (u == NULL)
2133 return NULL;
2134
2135 str = PyString_AS_STRING(u);
2136 if (str[fill] == '+' || str[fill] == '-') {
2137 /* move sign to beginning of string */
2138 str[0] = str[fill];
2139 str[fill] = '0';
2140 }
2141
2142 return u;
2143}
2144#endif
2145
2146static char isspace__doc__[] =
2147"S.isspace() -> int\n\
2148\n\
2149Return 1 if there are only whitespace characters in S,\n\
21500 otherwise.";
2151
2152static PyObject*
2153string_isspace(PyStringObject *self, PyObject *args)
2154{
Fred Drakeba096332000-07-09 07:04:36 +00002155 register const unsigned char *p
2156 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002157 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158
2159 if (!PyArg_NoArgs(args))
2160 return NULL;
2161
2162 /* Shortcut for single character strings */
2163 if (PyString_GET_SIZE(self) == 1 &&
2164 isspace(*p))
2165 return PyInt_FromLong(1);
2166
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002167 /* Special case for empty strings */
2168 if (PyString_GET_SIZE(self) == 0)
2169 return PyInt_FromLong(0);
2170
Guido van Rossum4c08d552000-03-10 22:55:18 +00002171 e = p + PyString_GET_SIZE(self);
2172 for (; p < e; p++) {
2173 if (!isspace(*p))
2174 return PyInt_FromLong(0);
2175 }
2176 return PyInt_FromLong(1);
2177}
2178
2179
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002180static char isalpha__doc__[] =
2181"S.isalpha() -> int\n\
2182\n\
2183Return 1 if all characters in S are alphabetic\n\
2184and there is at least one character in S, 0 otherwise.";
2185
2186static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002187string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002188{
Fred Drakeba096332000-07-09 07:04:36 +00002189 register const unsigned char *p
2190 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002191 register const unsigned char *e;
2192
2193 if (!PyArg_NoArgs(args))
2194 return NULL;
2195
2196 /* Shortcut for single character strings */
2197 if (PyString_GET_SIZE(self) == 1 &&
2198 isalpha(*p))
2199 return PyInt_FromLong(1);
2200
2201 /* Special case for empty strings */
2202 if (PyString_GET_SIZE(self) == 0)
2203 return PyInt_FromLong(0);
2204
2205 e = p + PyString_GET_SIZE(self);
2206 for (; p < e; p++) {
2207 if (!isalpha(*p))
2208 return PyInt_FromLong(0);
2209 }
2210 return PyInt_FromLong(1);
2211}
2212
2213
2214static char isalnum__doc__[] =
2215"S.isalnum() -> int\n\
2216\n\
2217Return 1 if all characters in S are alphanumeric\n\
2218and there is at least one character in S, 0 otherwise.";
2219
2220static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002221string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002222{
Fred Drakeba096332000-07-09 07:04:36 +00002223 register const unsigned char *p
2224 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002225 register const unsigned char *e;
2226
2227 if (!PyArg_NoArgs(args))
2228 return NULL;
2229
2230 /* Shortcut for single character strings */
2231 if (PyString_GET_SIZE(self) == 1 &&
2232 isalnum(*p))
2233 return PyInt_FromLong(1);
2234
2235 /* Special case for empty strings */
2236 if (PyString_GET_SIZE(self) == 0)
2237 return PyInt_FromLong(0);
2238
2239 e = p + PyString_GET_SIZE(self);
2240 for (; p < e; p++) {
2241 if (!isalnum(*p))
2242 return PyInt_FromLong(0);
2243 }
2244 return PyInt_FromLong(1);
2245}
2246
2247
Guido van Rossum4c08d552000-03-10 22:55:18 +00002248static char isdigit__doc__[] =
2249"S.isdigit() -> int\n\
2250\n\
2251Return 1 if there are only digit characters in S,\n\
22520 otherwise.";
2253
2254static PyObject*
2255string_isdigit(PyStringObject *self, PyObject *args)
2256{
Fred Drakeba096332000-07-09 07:04:36 +00002257 register const unsigned char *p
2258 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002259 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002260
2261 if (!PyArg_NoArgs(args))
2262 return NULL;
2263
2264 /* Shortcut for single character strings */
2265 if (PyString_GET_SIZE(self) == 1 &&
2266 isdigit(*p))
2267 return PyInt_FromLong(1);
2268
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002269 /* Special case for empty strings */
2270 if (PyString_GET_SIZE(self) == 0)
2271 return PyInt_FromLong(0);
2272
Guido van Rossum4c08d552000-03-10 22:55:18 +00002273 e = p + PyString_GET_SIZE(self);
2274 for (; p < e; p++) {
2275 if (!isdigit(*p))
2276 return PyInt_FromLong(0);
2277 }
2278 return PyInt_FromLong(1);
2279}
2280
2281
2282static char islower__doc__[] =
2283"S.islower() -> int\n\
2284\n\
2285Return 1 if all cased characters in S are lowercase and there is\n\
2286at least one cased character in S, 0 otherwise.";
2287
2288static PyObject*
2289string_islower(PyStringObject *self, PyObject *args)
2290{
Fred Drakeba096332000-07-09 07:04:36 +00002291 register const unsigned char *p
2292 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002293 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294 int cased;
2295
2296 if (!PyArg_NoArgs(args))
2297 return NULL;
2298
2299 /* Shortcut for single character strings */
2300 if (PyString_GET_SIZE(self) == 1)
2301 return PyInt_FromLong(islower(*p) != 0);
2302
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002303 /* Special case for empty strings */
2304 if (PyString_GET_SIZE(self) == 0)
2305 return PyInt_FromLong(0);
2306
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 e = p + PyString_GET_SIZE(self);
2308 cased = 0;
2309 for (; p < e; p++) {
2310 if (isupper(*p))
2311 return PyInt_FromLong(0);
2312 else if (!cased && islower(*p))
2313 cased = 1;
2314 }
2315 return PyInt_FromLong(cased);
2316}
2317
2318
2319static char isupper__doc__[] =
2320"S.isupper() -> int\n\
2321\n\
2322Return 1 if all cased characters in S are uppercase and there is\n\
2323at least one cased character in S, 0 otherwise.";
2324
2325static PyObject*
2326string_isupper(PyStringObject *self, PyObject *args)
2327{
Fred Drakeba096332000-07-09 07:04:36 +00002328 register const unsigned char *p
2329 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002330 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331 int cased;
2332
2333 if (!PyArg_NoArgs(args))
2334 return NULL;
2335
2336 /* Shortcut for single character strings */
2337 if (PyString_GET_SIZE(self) == 1)
2338 return PyInt_FromLong(isupper(*p) != 0);
2339
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002340 /* Special case for empty strings */
2341 if (PyString_GET_SIZE(self) == 0)
2342 return PyInt_FromLong(0);
2343
Guido van Rossum4c08d552000-03-10 22:55:18 +00002344 e = p + PyString_GET_SIZE(self);
2345 cased = 0;
2346 for (; p < e; p++) {
2347 if (islower(*p))
2348 return PyInt_FromLong(0);
2349 else if (!cased && isupper(*p))
2350 cased = 1;
2351 }
2352 return PyInt_FromLong(cased);
2353}
2354
2355
2356static char istitle__doc__[] =
2357"S.istitle() -> int\n\
2358\n\
2359Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2360may only follow uncased characters and lowercase characters only cased\n\
2361ones. Return 0 otherwise.";
2362
2363static PyObject*
2364string_istitle(PyStringObject *self, PyObject *args)
2365{
Fred Drakeba096332000-07-09 07:04:36 +00002366 register const unsigned char *p
2367 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002368 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 int cased, previous_is_cased;
2370
2371 if (!PyArg_NoArgs(args))
2372 return NULL;
2373
2374 /* Shortcut for single character strings */
2375 if (PyString_GET_SIZE(self) == 1)
2376 return PyInt_FromLong(isupper(*p) != 0);
2377
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002378 /* Special case for empty strings */
2379 if (PyString_GET_SIZE(self) == 0)
2380 return PyInt_FromLong(0);
2381
Guido van Rossum4c08d552000-03-10 22:55:18 +00002382 e = p + PyString_GET_SIZE(self);
2383 cased = 0;
2384 previous_is_cased = 0;
2385 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002386 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387
2388 if (isupper(ch)) {
2389 if (previous_is_cased)
2390 return PyInt_FromLong(0);
2391 previous_is_cased = 1;
2392 cased = 1;
2393 }
2394 else if (islower(ch)) {
2395 if (!previous_is_cased)
2396 return PyInt_FromLong(0);
2397 previous_is_cased = 1;
2398 cased = 1;
2399 }
2400 else
2401 previous_is_cased = 0;
2402 }
2403 return PyInt_FromLong(cased);
2404}
2405
2406
2407static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002408"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002409\n\
2410Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002411Line breaks are not included in the resulting list unless keepends\n\
2412is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002413
2414#define SPLIT_APPEND(data, left, right) \
2415 str = PyString_FromStringAndSize(data + left, right - left); \
2416 if (!str) \
2417 goto onError; \
2418 if (PyList_Append(list, str)) { \
2419 Py_DECREF(str); \
2420 goto onError; \
2421 } \
2422 else \
2423 Py_DECREF(str);
2424
2425static PyObject*
2426string_splitlines(PyStringObject *self, PyObject *args)
2427{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002428 register int i;
2429 register int j;
2430 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002431 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002432 PyObject *list;
2433 PyObject *str;
2434 char *data;
2435
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002436 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002437 return NULL;
2438
2439 data = PyString_AS_STRING(self);
2440 len = PyString_GET_SIZE(self);
2441
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 list = PyList_New(0);
2443 if (!list)
2444 goto onError;
2445
2446 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002447 int eol;
2448
Guido van Rossum4c08d552000-03-10 22:55:18 +00002449 /* Find a line and append it */
2450 while (i < len && data[i] != '\n' && data[i] != '\r')
2451 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002452
2453 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002454 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002455 if (i < len) {
2456 if (data[i] == '\r' && i + 1 < len &&
2457 data[i+1] == '\n')
2458 i += 2;
2459 else
2460 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002461 if (keepends)
2462 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002463 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002464 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002465 j = i;
2466 }
2467 if (j < len) {
2468 SPLIT_APPEND(data, j, len);
2469 }
2470
2471 return list;
2472
2473 onError:
2474 Py_DECREF(list);
2475 return NULL;
2476}
2477
2478#undef SPLIT_APPEND
2479
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002480
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002481static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002482string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002483 /* Counterparts of the obsolete stropmodule functions; except
2484 string.maketrans(). */
2485 {"join", (PyCFunction)string_join, 1, join__doc__},
2486 {"split", (PyCFunction)string_split, 1, split__doc__},
2487 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2488 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2489 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2490 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2491 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2492 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2493 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002494 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2495 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002496 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2497 {"count", (PyCFunction)string_count, 1, count__doc__},
2498 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2499 {"find", (PyCFunction)string_find, 1, find__doc__},
2500 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002502 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2503 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2504 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2505 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2507 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2508 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002509 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2510 {"title", (PyCFunction)string_title, 1, title__doc__},
2511 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2512 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2513 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002514 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002515 {"decode", (PyCFunction)string_decode, 1, decode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002516 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2517 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2518#if 0
2519 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2520#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002521 {NULL, NULL} /* sentinel */
2522};
2523
2524static PyObject *
Tim Peters6d6c1a32001-08-02 04:15:00 +00002525string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526{
Tim Peters6d6c1a32001-08-02 04:15:00 +00002527 PyObject *x = NULL;
2528 static char *kwlist[] = {"object", 0};
2529
2530 assert(type == &PyString_Type);
2531 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
2532 return NULL;
2533 if (x == NULL)
2534 return PyString_FromString("");
2535 return PyObject_Str(x);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002536}
2537
Tim Peters6d6c1a32001-08-02 04:15:00 +00002538static char string_doc[] =
2539"str(object) -> string\n\
2540\n\
2541Return a nice string representation of the object.\n\
2542If the argument is a string, the return value is the same object.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002543
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002544PyTypeObject PyString_Type = {
2545 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002546 0,
Tim Peters6d6c1a32001-08-02 04:15:00 +00002547 "str",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002548 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002549 sizeof(char),
Tim Peters6d6c1a32001-08-02 04:15:00 +00002550 (destructor)string_dealloc, /* tp_dealloc */
2551 (printfunc)string_print, /* tp_print */
2552 0, /* tp_getattr */
2553 0, /* tp_setattr */
2554 0, /* tp_compare */
2555 (reprfunc)string_repr, /* tp_repr */
2556 0, /* tp_as_number */
2557 &string_as_sequence, /* tp_as_sequence */
2558 0, /* tp_as_mapping */
2559 (hashfunc)string_hash, /* tp_hash */
2560 0, /* tp_call */
2561 (reprfunc)string_str, /* tp_str */
2562 PyObject_GenericGetAttr, /* tp_getattro */
2563 0, /* tp_setattro */
2564 &string_as_buffer, /* tp_as_buffer */
2565 Py_TPFLAGS_DEFAULT, /* tp_flags */
2566 string_doc, /* tp_doc */
2567 0, /* tp_traverse */
2568 0, /* tp_clear */
2569 (richcmpfunc)string_richcompare, /* tp_richcompare */
2570 0, /* tp_weaklistoffset */
2571 0, /* tp_iter */
2572 0, /* tp_iternext */
2573 string_methods, /* tp_methods */
2574 0, /* tp_members */
2575 0, /* tp_getset */
2576 0, /* tp_base */
2577 0, /* tp_dict */
2578 0, /* tp_descr_get */
2579 0, /* tp_descr_set */
2580 0, /* tp_dictoffset */
2581 0, /* tp_init */
2582 0, /* tp_alloc */
2583 string_new, /* tp_new */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002584};
2585
2586void
Fred Drakeba096332000-07-09 07:04:36 +00002587PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002588{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002589 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002590 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002591 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002592 if (w == NULL || !PyString_Check(*pv)) {
2593 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002594 *pv = NULL;
2595 return;
2596 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002597 v = string_concat((PyStringObject *) *pv, w);
2598 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002599 *pv = v;
2600}
2601
Guido van Rossum013142a1994-08-30 08:19:36 +00002602void
Fred Drakeba096332000-07-09 07:04:36 +00002603PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002604{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002605 PyString_Concat(pv, w);
2606 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002607}
2608
2609
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002610/* The following function breaks the notion that strings are immutable:
2611 it changes the size of a string. We get away with this only if there
2612 is only one module referencing the object. You can also think of it
2613 as creating a new string object and destroying the old one, only
2614 more efficiently. In any case, don't use this if the string may
2615 already be known to some other part of the code... */
2616
2617int
Fred Drakeba096332000-07-09 07:04:36 +00002618_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002619{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002620 register PyObject *v;
2621 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002622 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002623 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002624 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002625 Py_DECREF(v);
2626 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002627 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002628 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002629 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002630#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002631 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002632#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002633 _Py_ForgetReference(v);
2634 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002635 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002636 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002637 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002638 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002639 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002640 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002641 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002642 _Py_NewReference(*pv);
2643 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002644 sv->ob_size = newsize;
2645 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002646 return 0;
2647}
Guido van Rossume5372401993-03-16 12:15:04 +00002648
2649/* Helpers for formatstring */
2650
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002651static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002652getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002653{
2654 int argidx = *p_argidx;
2655 if (argidx < arglen) {
2656 (*p_argidx)++;
2657 if (arglen < 0)
2658 return args;
2659 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002660 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002661 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002662 PyErr_SetString(PyExc_TypeError,
2663 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002664 return NULL;
2665}
2666
Tim Peters38fd5b62000-09-21 05:43:11 +00002667/* Format codes
2668 * F_LJUST '-'
2669 * F_SIGN '+'
2670 * F_BLANK ' '
2671 * F_ALT '#'
2672 * F_ZERO '0'
2673 */
Guido van Rossume5372401993-03-16 12:15:04 +00002674#define F_LJUST (1<<0)
2675#define F_SIGN (1<<1)
2676#define F_BLANK (1<<2)
2677#define F_ALT (1<<3)
2678#define F_ZERO (1<<4)
2679
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002680static int
Fred Drakeba096332000-07-09 07:04:36 +00002681formatfloat(char *buf, size_t buflen, int flags,
2682 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002683{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002684 /* fmt = '%#.' + `prec` + `type`
2685 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002686 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002687 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002688 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002689 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002690 if (prec < 0)
2691 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002692 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2693 type = 'g';
2694 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002695 /* worst case length calc to ensure no buffer overrun:
2696 fmt = %#.<prec>g
2697 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002698 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002699 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2700 If prec=0 the effective precision is 1 (the leading digit is
2701 always given), therefore increase by one to 10+prec. */
2702 if (buflen <= (size_t)10 + (size_t)prec) {
2703 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002704 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002705 return -1;
2706 }
Guido van Rossume5372401993-03-16 12:15:04 +00002707 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002708 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002709}
2710
Tim Peters38fd5b62000-09-21 05:43:11 +00002711/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2712 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2713 * Python's regular ints.
2714 * Return value: a new PyString*, or NULL if error.
2715 * . *pbuf is set to point into it,
2716 * *plen set to the # of chars following that.
2717 * Caller must decref it when done using pbuf.
2718 * The string starting at *pbuf is of the form
2719 * "-"? ("0x" | "0X")? digit+
2720 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002721 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002722 * There will be at least prec digits, zero-filled on the left if
2723 * necessary to get that many.
2724 * val object to be converted
2725 * flags bitmask of format flags; only F_ALT is looked at
2726 * prec minimum number of digits; 0-fill on left if needed
2727 * type a character in [duoxX]; u acts the same as d
2728 *
2729 * CAUTION: o, x and X conversions on regular ints can never
2730 * produce a '-' sign, but can for Python's unbounded ints.
2731 */
2732PyObject*
2733_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2734 char **pbuf, int *plen)
2735{
2736 PyObject *result = NULL;
2737 char *buf;
2738 int i;
2739 int sign; /* 1 if '-', else 0 */
2740 int len; /* number of characters */
2741 int numdigits; /* len == numnondigits + numdigits */
2742 int numnondigits = 0;
2743
2744 switch (type) {
2745 case 'd':
2746 case 'u':
2747 result = val->ob_type->tp_str(val);
2748 break;
2749 case 'o':
2750 result = val->ob_type->tp_as_number->nb_oct(val);
2751 break;
2752 case 'x':
2753 case 'X':
2754 numnondigits = 2;
2755 result = val->ob_type->tp_as_number->nb_hex(val);
2756 break;
2757 default:
2758 assert(!"'type' not in [duoxX]");
2759 }
2760 if (!result)
2761 return NULL;
2762
2763 /* To modify the string in-place, there can only be one reference. */
2764 if (result->ob_refcnt != 1) {
2765 PyErr_BadInternalCall();
2766 return NULL;
2767 }
2768 buf = PyString_AsString(result);
2769 len = PyString_Size(result);
2770 if (buf[len-1] == 'L') {
2771 --len;
2772 buf[len] = '\0';
2773 }
2774 sign = buf[0] == '-';
2775 numnondigits += sign;
2776 numdigits = len - numnondigits;
2777 assert(numdigits > 0);
2778
Tim Petersfff53252001-04-12 18:38:48 +00002779 /* Get rid of base marker unless F_ALT */
2780 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002781 /* Need to skip 0x, 0X or 0. */
2782 int skipped = 0;
2783 switch (type) {
2784 case 'o':
2785 assert(buf[sign] == '0');
2786 /* If 0 is only digit, leave it alone. */
2787 if (numdigits > 1) {
2788 skipped = 1;
2789 --numdigits;
2790 }
2791 break;
2792 case 'x':
2793 case 'X':
2794 assert(buf[sign] == '0');
2795 assert(buf[sign + 1] == 'x');
2796 skipped = 2;
2797 numnondigits -= 2;
2798 break;
2799 }
2800 if (skipped) {
2801 buf += skipped;
2802 len -= skipped;
2803 if (sign)
2804 buf[0] = '-';
2805 }
2806 assert(len == numnondigits + numdigits);
2807 assert(numdigits > 0);
2808 }
2809
2810 /* Fill with leading zeroes to meet minimum width. */
2811 if (prec > numdigits) {
2812 PyObject *r1 = PyString_FromStringAndSize(NULL,
2813 numnondigits + prec);
2814 char *b1;
2815 if (!r1) {
2816 Py_DECREF(result);
2817 return NULL;
2818 }
2819 b1 = PyString_AS_STRING(r1);
2820 for (i = 0; i < numnondigits; ++i)
2821 *b1++ = *buf++;
2822 for (i = 0; i < prec - numdigits; i++)
2823 *b1++ = '0';
2824 for (i = 0; i < numdigits; i++)
2825 *b1++ = *buf++;
2826 *b1 = '\0';
2827 Py_DECREF(result);
2828 result = r1;
2829 buf = PyString_AS_STRING(result);
2830 len = numnondigits + prec;
2831 }
2832
2833 /* Fix up case for hex conversions. */
2834 switch (type) {
2835 case 'x':
2836 /* Need to convert all upper case letters to lower case. */
2837 for (i = 0; i < len; i++)
2838 if (buf[i] >= 'A' && buf[i] <= 'F')
2839 buf[i] += 'a'-'A';
2840 break;
2841 case 'X':
2842 /* Need to convert 0x to 0X (and -0x to -0X). */
2843 if (buf[sign + 1] == 'x')
2844 buf[sign + 1] = 'X';
2845 break;
2846 }
2847 *pbuf = buf;
2848 *plen = len;
2849 return result;
2850}
2851
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002852static int
Fred Drakeba096332000-07-09 07:04:36 +00002853formatint(char *buf, size_t buflen, int flags,
2854 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002855{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002856 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002857 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2858 + 1 + 1 = 24 */
2859 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002860 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002861 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002862 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002863 if (prec < 0)
2864 prec = 1;
2865 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002866 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002867 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002868 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002869 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002870 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002871 return -1;
2872 }
Guido van Rossume5372401993-03-16 12:15:04 +00002873 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002874 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2875 * but we want it (for consistency with other %#x conversions, and
2876 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002877 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2878 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2879 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002880 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002881 if (x == 0 &&
2882 (flags & F_ALT) &&
2883 (type == 'x' || type == 'X') &&
2884 buf[1] != (char)type) /* this last always true under std C */
2885 {
Tim Petersfff53252001-04-12 18:38:48 +00002886 memmove(buf+2, buf, strlen(buf) + 1);
2887 buf[0] = '0';
2888 buf[1] = (char)type;
2889 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002890 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002891}
2892
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002893static int
Fred Drakeba096332000-07-09 07:04:36 +00002894formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002895{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002896 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002897 if (PyString_Check(v)) {
2898 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002899 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002900 }
2901 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002902 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002903 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002904 }
2905 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002906 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002907}
2908
Guido van Rossum013142a1994-08-30 08:19:36 +00002909
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002910/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2911
2912 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2913 chars are formatted. XXX This is a magic number. Each formatting
2914 routine does bounds checking to ensure no overflow, but a better
2915 solution may be to malloc a buffer of appropriate size for each
2916 format. For now, the current solution is sufficient.
2917*/
2918#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002919
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002920PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002921PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002922{
2923 char *fmt, *res;
2924 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002925 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002926 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002927 PyObject *dict = NULL;
2928 if (format == NULL || !PyString_Check(format) || args == NULL) {
2929 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002930 return NULL;
2931 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002932 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002933 fmt = PyString_AsString(format);
2934 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002935 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002936 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002937 if (result == NULL)
2938 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002939 res = PyString_AsString(result);
2940 if (PyTuple_Check(args)) {
2941 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002942 argidx = 0;
2943 }
2944 else {
2945 arglen = -1;
2946 argidx = -2;
2947 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002948 if (args->ob_type->tp_as_mapping)
2949 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002950 while (--fmtcnt >= 0) {
2951 if (*fmt != '%') {
2952 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002953 rescnt = fmtcnt + 100;
2954 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002955 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002956 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002957 res = PyString_AsString(result)
2958 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002959 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002960 }
2961 *res++ = *fmt++;
2962 }
2963 else {
2964 /* Got a format specifier */
2965 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002966 int width = -1;
2967 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00002968 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002969 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002970 PyObject *v = NULL;
2971 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002972 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002973 int sign;
2974 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002975 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002976 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002977 int argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002978
Guido van Rossumda9c2711996-12-05 21:58:58 +00002979 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002980 if (*fmt == '(') {
2981 char *keystart;
2982 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002983 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002984 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002985
2986 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002987 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002988 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002989 goto error;
2990 }
2991 ++fmt;
2992 --fmtcnt;
2993 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002994 /* Skip over balanced parentheses */
2995 while (pcount > 0 && --fmtcnt >= 0) {
2996 if (*fmt == ')')
2997 --pcount;
2998 else if (*fmt == '(')
2999 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00003000 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00003001 }
3002 keylen = fmt - keystart - 1;
3003 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003004 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003005 "incomplete format key");
3006 goto error;
3007 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003008 key = PyString_FromStringAndSize(keystart,
3009 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00003010 if (key == NULL)
3011 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00003012 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003013 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00003014 args_owned = 0;
3015 }
3016 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003017 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00003018 if (args == NULL) {
3019 goto error;
3020 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003021 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003022 arglen = -1;
3023 argidx = -2;
3024 }
Guido van Rossume5372401993-03-16 12:15:04 +00003025 while (--fmtcnt >= 0) {
3026 switch (c = *fmt++) {
3027 case '-': flags |= F_LJUST; continue;
3028 case '+': flags |= F_SIGN; continue;
3029 case ' ': flags |= F_BLANK; continue;
3030 case '#': flags |= F_ALT; continue;
3031 case '0': flags |= F_ZERO; continue;
3032 }
3033 break;
3034 }
3035 if (c == '*') {
3036 v = getnextarg(args, arglen, &argidx);
3037 if (v == NULL)
3038 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003039 if (!PyInt_Check(v)) {
3040 PyErr_SetString(PyExc_TypeError,
3041 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003042 goto error;
3043 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003044 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003045 if (width < 0) {
3046 flags |= F_LJUST;
3047 width = -width;
3048 }
Guido van Rossume5372401993-03-16 12:15:04 +00003049 if (--fmtcnt >= 0)
3050 c = *fmt++;
3051 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003052 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003053 width = c - '0';
3054 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003055 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003056 if (!isdigit(c))
3057 break;
3058 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003059 PyErr_SetString(
3060 PyExc_ValueError,
3061 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003062 goto error;
3063 }
3064 width = width*10 + (c - '0');
3065 }
3066 }
3067 if (c == '.') {
3068 prec = 0;
3069 if (--fmtcnt >= 0)
3070 c = *fmt++;
3071 if (c == '*') {
3072 v = getnextarg(args, arglen, &argidx);
3073 if (v == NULL)
3074 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003075 if (!PyInt_Check(v)) {
3076 PyErr_SetString(
3077 PyExc_TypeError,
3078 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003079 goto error;
3080 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003081 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003082 if (prec < 0)
3083 prec = 0;
3084 if (--fmtcnt >= 0)
3085 c = *fmt++;
3086 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003087 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003088 prec = c - '0';
3089 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003090 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003091 if (!isdigit(c))
3092 break;
3093 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003094 PyErr_SetString(
3095 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003096 "prec too big");
3097 goto error;
3098 }
3099 prec = prec*10 + (c - '0');
3100 }
3101 }
3102 } /* prec */
3103 if (fmtcnt >= 0) {
3104 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003105 if (--fmtcnt >= 0)
3106 c = *fmt++;
3107 }
3108 }
3109 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003110 PyErr_SetString(PyExc_ValueError,
3111 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003112 goto error;
3113 }
3114 if (c != '%') {
3115 v = getnextarg(args, arglen, &argidx);
3116 if (v == NULL)
3117 goto error;
3118 }
3119 sign = 0;
3120 fill = ' ';
3121 switch (c) {
3122 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003123 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003124 len = 1;
3125 break;
3126 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003127 case 'r':
3128 if (PyUnicode_Check(v)) {
3129 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003130 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003131 goto unicode;
3132 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003133 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003134 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003135 else
3136 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003137 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003138 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003139 if (!PyString_Check(temp)) {
3140 PyErr_SetString(PyExc_TypeError,
3141 "%s argument has non-string str()");
3142 goto error;
3143 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003144 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003145 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003146 if (prec >= 0 && len > prec)
3147 len = prec;
3148 break;
3149 case 'i':
3150 case 'd':
3151 case 'u':
3152 case 'o':
3153 case 'x':
3154 case 'X':
3155 if (c == 'i')
3156 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003157 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003158 temp = _PyString_FormatLong(v, flags,
3159 prec, c, &pbuf, &len);
3160 if (!temp)
3161 goto error;
3162 /* unbounded ints can always produce
3163 a sign character! */
3164 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003165 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003166 else {
3167 pbuf = formatbuf;
3168 len = formatint(pbuf, sizeof(formatbuf),
3169 flags, prec, c, v);
3170 if (len < 0)
3171 goto error;
3172 /* only d conversion is signed */
3173 sign = c == 'd';
3174 }
3175 if (flags & F_ZERO)
3176 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003177 break;
3178 case 'e':
3179 case 'E':
3180 case 'f':
3181 case 'g':
3182 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003183 pbuf = formatbuf;
3184 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003185 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003186 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003187 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003188 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003189 fill = '0';
3190 break;
3191 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003192 pbuf = formatbuf;
3193 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003194 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003195 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003196 break;
3197 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003198 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003199 "unsupported format character '%c' (0x%x) "
3200 "at index %i",
3201 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003202 goto error;
3203 }
3204 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003205 if (*pbuf == '-' || *pbuf == '+') {
3206 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003207 len--;
3208 }
3209 else if (flags & F_SIGN)
3210 sign = '+';
3211 else if (flags & F_BLANK)
3212 sign = ' ';
3213 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003214 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003215 }
3216 if (width < len)
3217 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003218 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003219 reslen -= rescnt;
3220 rescnt = width + fmtcnt + 100;
3221 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003222 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003223 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003224 res = PyString_AsString(result)
3225 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003226 }
3227 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003228 if (fill != ' ')
3229 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003230 rescnt--;
3231 if (width > len)
3232 width--;
3233 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003234 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3235 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003236 assert(pbuf[1] == c);
3237 if (fill != ' ') {
3238 *res++ = *pbuf++;
3239 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003240 }
Tim Petersfff53252001-04-12 18:38:48 +00003241 rescnt -= 2;
3242 width -= 2;
3243 if (width < 0)
3244 width = 0;
3245 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003246 }
3247 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003248 do {
3249 --rescnt;
3250 *res++ = fill;
3251 } while (--width > len);
3252 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003253 if (fill == ' ') {
3254 if (sign)
3255 *res++ = sign;
3256 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003257 (c == 'x' || c == 'X')) {
3258 assert(pbuf[0] == '0');
3259 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003260 *res++ = *pbuf++;
3261 *res++ = *pbuf++;
3262 }
3263 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003264 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003265 res += len;
3266 rescnt -= len;
3267 while (--width >= len) {
3268 --rescnt;
3269 *res++ = ' ';
3270 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003271 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003272 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003273 "not all arguments converted");
3274 goto error;
3275 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003276 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003277 } /* '%' */
3278 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003279 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003280 PyErr_SetString(PyExc_TypeError,
3281 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003282 goto error;
3283 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003284 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003285 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003286 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003287 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003288 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003289
3290 unicode:
3291 if (args_owned) {
3292 Py_DECREF(args);
3293 args_owned = 0;
3294 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003295 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003296 if (PyTuple_Check(orig_args) && argidx > 0) {
3297 PyObject *v;
3298 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3299 v = PyTuple_New(n);
3300 if (v == NULL)
3301 goto error;
3302 while (--n >= 0) {
3303 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3304 Py_INCREF(w);
3305 PyTuple_SET_ITEM(v, n, w);
3306 }
3307 args = v;
3308 } else {
3309 Py_INCREF(orig_args);
3310 args = orig_args;
3311 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003312 args_owned = 1;
3313 /* Take what we have of the result and let the Unicode formatting
3314 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003315 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003316 if (_PyString_Resize(&result, rescnt))
3317 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003318 fmtcnt = PyString_GET_SIZE(format) - \
3319 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003320 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3321 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003322 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003323 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003324 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003325 if (v == NULL)
3326 goto error;
3327 /* Paste what we have (result) to what the Unicode formatting
3328 function returned (v) and return the result (or error) */
3329 w = PyUnicode_Concat(result, v);
3330 Py_DECREF(result);
3331 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003332 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003333 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003334
Guido van Rossume5372401993-03-16 12:15:04 +00003335 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003336 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003337 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003338 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003339 }
Guido van Rossume5372401993-03-16 12:15:04 +00003340 return NULL;
3341}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003342
3343
3344#ifdef INTERN_STRINGS
3345
Barry Warsaw4df762f2000-08-16 23:41:01 +00003346/* This dictionary will leak at PyString_Fini() time. That's acceptable
3347 * because PyString_Fini() specifically frees interned strings that are
3348 * only referenced by this dictionary. The CVS log entry for revision 2.45
3349 * says:
3350 *
3351 * Change the Fini function to only remove otherwise unreferenced
3352 * strings from the interned table. There are references in
3353 * hard-to-find static variables all over the interpreter, and it's not
3354 * worth trying to get rid of all those; but "uninterning" isn't fair
3355 * either and may cause subtle failures later -- so we have to keep them
3356 * in the interned table.
3357 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003358static PyObject *interned;
3359
3360void
Fred Drakeba096332000-07-09 07:04:36 +00003361PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003362{
3363 register PyStringObject *s = (PyStringObject *)(*p);
3364 PyObject *t;
3365 if (s == NULL || !PyString_Check(s))
3366 Py_FatalError("PyString_InternInPlace: strings only please!");
3367 if ((t = s->ob_sinterned) != NULL) {
3368 if (t == (PyObject *)s)
3369 return;
3370 Py_INCREF(t);
3371 *p = t;
3372 Py_DECREF(s);
3373 return;
3374 }
3375 if (interned == NULL) {
3376 interned = PyDict_New();
3377 if (interned == NULL)
3378 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003379 }
3380 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3381 Py_INCREF(t);
3382 *p = s->ob_sinterned = t;
3383 Py_DECREF(s);
3384 return;
3385 }
3386 t = (PyObject *)s;
3387 if (PyDict_SetItem(interned, t, t) == 0) {
3388 s->ob_sinterned = t;
3389 return;
3390 }
3391 PyErr_Clear();
3392}
3393
3394
3395PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003396PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003397{
3398 PyObject *s = PyString_FromString(cp);
3399 if (s == NULL)
3400 return NULL;
3401 PyString_InternInPlace(&s);
3402 return s;
3403}
3404
3405#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003406
3407void
Fred Drakeba096332000-07-09 07:04:36 +00003408PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003409{
3410 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003411 for (i = 0; i < UCHAR_MAX + 1; i++) {
3412 Py_XDECREF(characters[i]);
3413 characters[i] = NULL;
3414 }
3415#ifndef DONT_SHARE_SHORT_STRINGS
3416 Py_XDECREF(nullstring);
3417 nullstring = NULL;
3418#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003419#ifdef INTERN_STRINGS
3420 if (interned) {
3421 int pos, changed;
3422 PyObject *key, *value;
3423 do {
3424 changed = 0;
3425 pos = 0;
3426 while (PyDict_Next(interned, &pos, &key, &value)) {
3427 if (key->ob_refcnt == 2 && key == value) {
3428 PyDict_DelItem(interned, key);
3429 changed = 1;
3430 }
3431 }
3432 } while (changed);
3433 }
3434#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003435}
Barry Warsawa903ad982001-02-23 16:40:48 +00003436
3437#ifdef INTERN_STRINGS
3438void _Py_ReleaseInternedStrings(void)
3439{
3440 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003441 fprintf(stderr, "releasing interned strings\n");
3442 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003443 Py_DECREF(interned);
3444 interned = NULL;
3445 }
3446}
3447#endif /* INTERN_STRINGS */