blob: bcf51474ec4d80b8aee5422c137939a1a5a35968 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000150PyObject *PyString_Decode(const char *s,
151 int size,
152 const char *encoding,
153 const char *errors)
154{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000155 PyObject *v, *str;
156
157 str = PyString_FromStringAndSize(s, size);
158 if (str == NULL)
159 return NULL;
160 v = PyString_AsDecodedString(str, encoding, errors);
161 Py_DECREF(str);
162 return v;
163}
164
165PyObject *PyString_AsDecodedObject(PyObject *str,
166 const char *encoding,
167 const char *errors)
168{
169 PyObject *v;
170
171 if (!PyString_Check(str)) {
172 PyErr_BadArgument();
173 goto onError;
174 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000175
176 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000177 encoding = PyUnicode_GetDefaultEncoding();
178
179 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000180 v = PyCodec_Decode(str, encoding, errors);
181 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000182 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000183
184 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000185
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000186 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000187 return NULL;
188}
189
190PyObject *PyString_AsDecodedString(PyObject *str,
191 const char *encoding,
192 const char *errors)
193{
194 PyObject *v;
195
196 v = PyString_AsDecodedObject(str, encoding, errors);
197 if (v == NULL)
198 goto onError;
199
200 /* Convert Unicode to a string using the default encoding */
201 if (PyUnicode_Check(v)) {
202 PyObject *temp = v;
203 v = PyUnicode_AsEncodedString(v, NULL, NULL);
204 Py_DECREF(temp);
205 if (v == NULL)
206 goto onError;
207 }
208 if (!PyString_Check(v)) {
209 PyErr_Format(PyExc_TypeError,
210 "decoder did not return a string object (type=%.400s)",
211 v->ob_type->tp_name);
212 Py_DECREF(v);
213 goto onError;
214 }
215
216 return v;
217
218 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000219 return NULL;
220}
221
222PyObject *PyString_Encode(const char *s,
223 int size,
224 const char *encoding,
225 const char *errors)
226{
227 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000228
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000229 str = PyString_FromStringAndSize(s, size);
230 if (str == NULL)
231 return NULL;
232 v = PyString_AsEncodedString(str, encoding, errors);
233 Py_DECREF(str);
234 return v;
235}
236
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000237PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000238 const char *encoding,
239 const char *errors)
240{
241 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000242
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000243 if (!PyString_Check(str)) {
244 PyErr_BadArgument();
245 goto onError;
246 }
247
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000248 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000249 encoding = PyUnicode_GetDefaultEncoding();
250
251 /* Encode via the codec registry */
252 v = PyCodec_Encode(str, encoding, errors);
253 if (v == NULL)
254 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000255
256 return v;
257
258 onError:
259 return NULL;
260}
261
262PyObject *PyString_AsEncodedString(PyObject *str,
263 const char *encoding,
264 const char *errors)
265{
266 PyObject *v;
267
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000268 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000269 if (v == NULL)
270 goto onError;
271
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000272 /* Convert Unicode to a string using the default encoding */
273 if (PyUnicode_Check(v)) {
274 PyObject *temp = v;
275 v = PyUnicode_AsEncodedString(v, NULL, NULL);
276 Py_DECREF(temp);
277 if (v == NULL)
278 goto onError;
279 }
280 if (!PyString_Check(v)) {
281 PyErr_Format(PyExc_TypeError,
282 "encoder did not return a string object (type=%.400s)",
283 v->ob_type->tp_name);
284 Py_DECREF(v);
285 goto onError;
286 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000287
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000288 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000289
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000290 onError:
291 return NULL;
292}
293
Guido van Rossum234f9421993-06-17 12:35:49 +0000294static void
Fred Drakeba096332000-07-09 07:04:36 +0000295string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000296{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000297 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000298}
299
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000300static int
301string_getsize(register PyObject *op)
302{
303 char *s;
304 int len;
305 if (PyString_AsStringAndSize(op, &s, &len))
306 return -1;
307 return len;
308}
309
310static /*const*/ char *
311string_getbuffer(register PyObject *op)
312{
313 char *s;
314 int len;
315 if (PyString_AsStringAndSize(op, &s, &len))
316 return NULL;
317 return s;
318}
319
Guido van Rossumd7047b31995-01-02 19:07:15 +0000320int
Fred Drakeba096332000-07-09 07:04:36 +0000321PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000323 if (!PyString_Check(op))
324 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000325 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326}
327
328/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000329PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000331 if (!PyString_Check(op))
332 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000333 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334}
335
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000336/* Internal API needed by PyString_AsStringAndSize(): */
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000337extern
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000338PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
339 const char *errors);
340
341int
342PyString_AsStringAndSize(register PyObject *obj,
343 register char **s,
344 register int *len)
345{
346 if (s == NULL) {
347 PyErr_BadInternalCall();
348 return -1;
349 }
350
351 if (!PyString_Check(obj)) {
352 if (PyUnicode_Check(obj)) {
353 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
354 if (obj == NULL)
355 return -1;
356 }
357 else {
358 PyErr_Format(PyExc_TypeError,
359 "expected string or Unicode object, "
360 "%.200s found", obj->ob_type->tp_name);
361 return -1;
362 }
363 }
364
365 *s = PyString_AS_STRING(obj);
366 if (len != NULL)
367 *len = PyString_GET_SIZE(obj);
368 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
369 PyErr_SetString(PyExc_TypeError,
370 "expected string without null bytes");
371 return -1;
372 }
373 return 0;
374}
375
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000376/* Methods */
377
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000378static int
Fred Drakeba096332000-07-09 07:04:36 +0000379string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380{
381 int i;
382 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000383 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000384 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000385 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000386 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000387 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000388 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000389
Thomas Wouters7e474022000-07-16 12:04:32 +0000390 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000391 quote = '\'';
392 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
393 quote = '"';
394
395 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000396 for (i = 0; i < op->ob_size; i++) {
397 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000398 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000399 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000400 else if (c == '\t')
401 fprintf(fp, "\\t");
402 else if (c == '\n')
403 fprintf(fp, "\\n");
404 else if (c == '\r')
405 fprintf(fp, "\\r");
406 else if (c < ' ' || c >= 0x7f)
407 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000408 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000409 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000411 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000412 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413}
414
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000416string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000417{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000418 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
419 PyObject *v;
420 if (newsize > INT_MAX) {
421 PyErr_SetString(PyExc_OverflowError,
422 "string is too large to make repr");
423 }
424 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000425 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000426 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000427 }
428 else {
429 register int i;
430 register char c;
431 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000432 int quote;
433
Thomas Wouters7e474022000-07-16 12:04:32 +0000434 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000435 quote = '\'';
436 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
437 quote = '"';
438
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000440 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000441 for (i = 0; i < op->ob_size; i++) {
442 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000443 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000444 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000445 else if (c == '\t')
446 *p++ = '\\', *p++ = 't';
447 else if (c == '\n')
448 *p++ = '\\', *p++ = 'n';
449 else if (c == '\r')
450 *p++ = '\\', *p++ = 'r';
451 else if (c < ' ' || c >= 0x7f) {
452 sprintf(p, "\\x%02x", c & 0xff);
453 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 }
455 else
456 *p++ = c;
457 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000458 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000460 _PyString_Resize(
461 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000462 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000463 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000464}
465
Guido van Rossum189f1df2001-05-01 16:51:53 +0000466static PyObject *
467string_str(PyObject *s)
468{
469 Py_INCREF(s);
470 return s;
471}
472
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473static int
Fred Drakeba096332000-07-09 07:04:36 +0000474string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000475{
476 return a->ob_size;
477}
478
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000479static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000480string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481{
482 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000483 register PyStringObject *op;
484 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000485 if (PyUnicode_Check(bb))
486 return PyUnicode_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000487 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000488 "cannot add type \"%.200s\" to string",
489 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000490 return NULL;
491 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000492#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493 /* Optimize cases with empty left or right operand */
494 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000495 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000496 return bb;
497 }
498 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000499 Py_INCREF(a);
500 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000501 }
502 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000503 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000504 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000505 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000506 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000507 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000508 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000509#ifdef CACHE_HASH
510 op->ob_shash = -1;
511#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000512#ifdef INTERN_STRINGS
513 op->ob_sinterned = NULL;
514#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000515 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
516 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
517 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000518 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519#undef b
520}
521
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000522static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000523string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524{
525 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000526 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000527 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000528 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529 if (n < 0)
530 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000531 /* watch out for overflows: the size can overflow int,
532 * and the # of bytes needed can overflow size_t
533 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000535 if (n && size / n != a->ob_size) {
536 PyErr_SetString(PyExc_OverflowError,
537 "repeated string is too long");
538 return NULL;
539 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000541 Py_INCREF(a);
542 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000543 }
Tim Peters8f422462000-09-09 06:13:41 +0000544 nbytes = size * sizeof(char);
545 if (nbytes / sizeof(char) != (size_t)size ||
546 nbytes + sizeof(PyStringObject) <= nbytes) {
547 PyErr_SetString(PyExc_OverflowError,
548 "repeated string is too long");
549 return NULL;
550 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000551 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000552 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000553 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000554 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000555 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000556#ifdef CACHE_HASH
557 op->ob_shash = -1;
558#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000559#ifdef INTERN_STRINGS
560 op->ob_sinterned = NULL;
561#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000562 for (i = 0; i < size; i += a->ob_size)
563 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
564 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566}
567
568/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
569
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000570static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000571string_slice(register PyStringObject *a, register int i, register int j)
572 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000573{
574 if (i < 0)
575 i = 0;
576 if (j < 0)
577 j = 0; /* Avoid signed/unsigned bug in next line */
578 if (j > a->ob_size)
579 j = a->ob_size;
580 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000581 Py_INCREF(a);
582 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 }
584 if (j < i)
585 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000587}
588
Guido van Rossum9284a572000-03-07 15:53:43 +0000589static int
Fred Drakeba096332000-07-09 07:04:36 +0000590string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000591{
592 register char *s, *end;
593 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000594 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000595 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000596 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000597 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000598 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000599 return -1;
600 }
601 c = PyString_AsString(el)[0];
602 s = PyString_AsString(a);
603 end = s + PyString_Size(a);
604 while (s < end) {
605 if (c == *s++)
606 return 1;
607 }
608 return 0;
609}
610
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000612string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000614 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000615 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000616 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000617 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000618 return NULL;
619 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000620 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000621 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000622 if (v == NULL)
623 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000624 else {
625#ifdef COUNT_ALLOCS
626 one_strings++;
627#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000628 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000629 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000630 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000631}
632
Martin v. Löwiscd353062001-05-24 16:56:35 +0000633static PyObject*
634string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000635{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000636 int c;
637 int len_a, len_b;
638 int min_len;
639 PyObject *result;
640
641 /* One of the objects is a string object. Make sure the
642 other one is one, too. */
643 if (a->ob_type != b->ob_type) {
644 result = Py_NotImplemented;
645 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000646 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000647 if (a == b) {
648 switch (op) {
649 case Py_EQ:case Py_LE:case Py_GE:
650 result = Py_True;
651 goto out;
652 case Py_NE:case Py_LT:case Py_GT:
653 result = Py_False;
654 goto out;
655 }
656 }
657 if (op == Py_EQ) {
658 /* Supporting Py_NE here as well does not save
659 much time, since Py_NE is rarely used. */
660 if (a->ob_size == b->ob_size
661 && (a->ob_sval[0] == b->ob_sval[0]
662 && memcmp(a->ob_sval, b->ob_sval,
663 a->ob_size) == 0)) {
664 result = Py_True;
665 } else {
666 result = Py_False;
667 }
668 goto out;
669 }
670 len_a = a->ob_size; len_b = b->ob_size;
671 min_len = (len_a < len_b) ? len_a : len_b;
672 if (min_len > 0) {
673 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
674 if (c==0)
675 c = memcmp(a->ob_sval, b->ob_sval, min_len);
676 }else
677 c = 0;
678 if (c == 0)
679 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
680 switch (op) {
681 case Py_LT: c = c < 0; break;
682 case Py_LE: c = c <= 0; break;
683 case Py_EQ: assert(0); break; /* unreachable */
684 case Py_NE: c = c != 0; break;
685 case Py_GT: c = c > 0; break;
686 case Py_GE: c = c >= 0; break;
687 default:
688 result = Py_NotImplemented;
689 goto out;
690 }
691 result = c ? Py_True : Py_False;
692 out:
693 Py_INCREF(result);
694 return result;
695}
696
697int
698_PyString_Eq(PyObject *o1, PyObject *o2)
699{
700 PyStringObject *a, *b;
701 a = (PyStringObject*)o1;
702 b = (PyStringObject*)o2;
703 return a->ob_size == b->ob_size
704 && *a->ob_sval == *b->ob_sval
705 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706}
707
Guido van Rossum9bfef441993-03-29 10:43:31 +0000708static long
Fred Drakeba096332000-07-09 07:04:36 +0000709string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000710{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000711 register int len;
712 register unsigned char *p;
713 register long x;
714
715#ifdef CACHE_HASH
716 if (a->ob_shash != -1)
717 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000718#ifdef INTERN_STRINGS
719 if (a->ob_sinterned != NULL)
720 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000721 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000722#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000723#endif
724 len = a->ob_size;
725 p = (unsigned char *) a->ob_sval;
726 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000727 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000728 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000729 x ^= a->ob_size;
730 if (x == -1)
731 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000732#ifdef CACHE_HASH
733 a->ob_shash = x;
734#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000735 return x;
736}
737
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000738static int
Fred Drakeba096332000-07-09 07:04:36 +0000739string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000740{
741 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000742 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000743 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000744 return -1;
745 }
746 *ptr = (void *)self->ob_sval;
747 return self->ob_size;
748}
749
750static int
Fred Drakeba096332000-07-09 07:04:36 +0000751string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000752{
Guido van Rossum045e6881997-09-08 18:30:11 +0000753 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000754 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000755 return -1;
756}
757
758static int
Fred Drakeba096332000-07-09 07:04:36 +0000759string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000760{
761 if ( lenp )
762 *lenp = self->ob_size;
763 return 1;
764}
765
Guido van Rossum1db70701998-10-08 02:18:52 +0000766static int
Fred Drakeba096332000-07-09 07:04:36 +0000767string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000768{
769 if ( index != 0 ) {
770 PyErr_SetString(PyExc_SystemError,
771 "accessing non-existent string segment");
772 return -1;
773 }
774 *ptr = self->ob_sval;
775 return self->ob_size;
776}
777
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000778static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000779 (inquiry)string_length, /*sq_length*/
780 (binaryfunc)string_concat, /*sq_concat*/
781 (intargfunc)string_repeat, /*sq_repeat*/
782 (intargfunc)string_item, /*sq_item*/
783 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000784 0, /*sq_ass_item*/
785 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000786 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787};
788
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000789static PyBufferProcs string_as_buffer = {
790 (getreadbufferproc)string_buffer_getreadbuf,
791 (getwritebufferproc)string_buffer_getwritebuf,
792 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000793 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000794};
795
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000796
797
798#define LEFTSTRIP 0
799#define RIGHTSTRIP 1
800#define BOTHSTRIP 2
801
802
803static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000804split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000805{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000806 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000807 PyObject* item;
808 PyObject *list = PyList_New(0);
809
810 if (list == NULL)
811 return NULL;
812
Guido van Rossum4c08d552000-03-10 22:55:18 +0000813 for (i = j = 0; i < len; ) {
814 while (i < len && isspace(Py_CHARMASK(s[i])))
815 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000816 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000817 while (i < len && !isspace(Py_CHARMASK(s[i])))
818 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000819 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000820 if (maxsplit-- <= 0)
821 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000822 item = PyString_FromStringAndSize(s+j, (int)(i-j));
823 if (item == NULL)
824 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825 err = PyList_Append(list, item);
826 Py_DECREF(item);
827 if (err < 0)
828 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000829 while (i < len && isspace(Py_CHARMASK(s[i])))
830 i++;
831 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000832 }
833 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000834 if (j < len) {
835 item = PyString_FromStringAndSize(s+j, (int)(len - j));
836 if (item == NULL)
837 goto finally;
838 err = PyList_Append(list, item);
839 Py_DECREF(item);
840 if (err < 0)
841 goto finally;
842 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000843 return list;
844 finally:
845 Py_DECREF(list);
846 return NULL;
847}
848
849
850static char split__doc__[] =
851"S.split([sep [,maxsplit]]) -> list of strings\n\
852\n\
853Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000854delimiter string. If maxsplit is given, at most maxsplit\n\
855splits are done. If sep is not specified, any whitespace string\n\
856is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000857
858static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000859string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000860{
861 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000862 int maxsplit = -1;
863 const char *s = PyString_AS_STRING(self), *sub;
864 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000865
Guido van Rossum4c08d552000-03-10 22:55:18 +0000866 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000867 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000868 if (maxsplit < 0)
869 maxsplit = INT_MAX;
870 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000871 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000872 if (PyString_Check(subobj)) {
873 sub = PyString_AS_STRING(subobj);
874 n = PyString_GET_SIZE(subobj);
875 }
876 else if (PyUnicode_Check(subobj))
877 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
878 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
879 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000880 if (n == 0) {
881 PyErr_SetString(PyExc_ValueError, "empty separator");
882 return NULL;
883 }
884
885 list = PyList_New(0);
886 if (list == NULL)
887 return NULL;
888
889 i = j = 0;
890 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000891 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000892 if (maxsplit-- <= 0)
893 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000894 item = PyString_FromStringAndSize(s+j, (int)(i-j));
895 if (item == NULL)
896 goto fail;
897 err = PyList_Append(list, item);
898 Py_DECREF(item);
899 if (err < 0)
900 goto fail;
901 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000902 }
903 else
904 i++;
905 }
906 item = PyString_FromStringAndSize(s+j, (int)(len-j));
907 if (item == NULL)
908 goto fail;
909 err = PyList_Append(list, item);
910 Py_DECREF(item);
911 if (err < 0)
912 goto fail;
913
914 return list;
915
916 fail:
917 Py_DECREF(list);
918 return NULL;
919}
920
921
922static char join__doc__[] =
923"S.join(sequence) -> string\n\
924\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000925Return a string which is the concatenation of the strings in the\n\
926sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000927
928static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000929string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000930{
931 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000932 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000933 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000934 char *p;
935 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000936 size_t sz = 0;
937 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000938 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000939
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000940 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000941 return NULL;
942
Tim Peters19fe14e2001-01-19 03:03:47 +0000943 seq = PySequence_Fast(orig, "");
944 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000945 if (PyErr_ExceptionMatches(PyExc_TypeError))
946 PyErr_Format(PyExc_TypeError,
947 "sequence expected, %.80s found",
948 orig->ob_type->tp_name);
949 return NULL;
950 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000951
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000952 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000953 if (seqlen == 0) {
954 Py_DECREF(seq);
955 return PyString_FromString("");
956 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000957 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000958 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000959 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
960 PyErr_Format(PyExc_TypeError,
961 "sequence item 0: expected string,"
962 " %.80s found",
963 item->ob_type->tp_name);
964 Py_DECREF(seq);
965 return NULL;
966 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000967 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000968 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000969 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000970 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000971
Tim Peters19fe14e2001-01-19 03:03:47 +0000972 /* There are at least two things to join. Do a pre-pass to figure out
973 * the total amount of space we'll need (sz), see whether any argument
974 * is absurd, and defer to the Unicode join if appropriate.
975 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000976 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000977 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000978 item = PySequence_Fast_GET_ITEM(seq, i);
979 if (!PyString_Check(item)){
980 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +0000981 /* Defer to Unicode join.
982 * CAUTION: There's no gurantee that the
983 * original sequence can be iterated over
984 * again, so we must pass seq here.
985 */
986 PyObject *result;
987 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000988 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +0000989 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000990 }
991 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000992 "sequence item %i: expected string,"
993 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000994 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000995 Py_DECREF(seq);
996 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000997 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000998 sz += PyString_GET_SIZE(item);
999 if (i != 0)
1000 sz += seplen;
1001 if (sz < old_sz || sz > INT_MAX) {
1002 PyErr_SetString(PyExc_OverflowError,
1003 "join() is too long for a Python string");
1004 Py_DECREF(seq);
1005 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001007 }
1008
1009 /* Allocate result space. */
1010 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1011 if (res == NULL) {
1012 Py_DECREF(seq);
1013 return NULL;
1014 }
1015
1016 /* Catenate everything. */
1017 p = PyString_AS_STRING(res);
1018 for (i = 0; i < seqlen; ++i) {
1019 size_t n;
1020 item = PySequence_Fast_GET_ITEM(seq, i);
1021 n = PyString_GET_SIZE(item);
1022 memcpy(p, PyString_AS_STRING(item), n);
1023 p += n;
1024 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001025 memcpy(p, sep, seplen);
1026 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001027 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001029
Jeremy Hylton49048292000-07-11 03:28:17 +00001030 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001031 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001032}
1033
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001034static long
Fred Drakeba096332000-07-09 07:04:36 +00001035string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001036{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001037 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001038 int len = PyString_GET_SIZE(self);
1039 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001040 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001041
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001042 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001043 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001044 return -2;
1045 if (PyString_Check(subobj)) {
1046 sub = PyString_AS_STRING(subobj);
1047 n = PyString_GET_SIZE(subobj);
1048 }
1049 else if (PyUnicode_Check(subobj))
1050 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
1051 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052 return -2;
1053
1054 if (last > len)
1055 last = len;
1056 if (last < 0)
1057 last += len;
1058 if (last < 0)
1059 last = 0;
1060 if (i < 0)
1061 i += len;
1062 if (i < 0)
1063 i = 0;
1064
Guido van Rossum4c08d552000-03-10 22:55:18 +00001065 if (dir > 0) {
1066 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001067 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001068 last -= n;
1069 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001070 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001071 return (long)i;
1072 }
1073 else {
1074 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001075
Guido van Rossum4c08d552000-03-10 22:55:18 +00001076 if (n == 0 && i <= last)
1077 return (long)last;
1078 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001079 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001080 return (long)j;
1081 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001082
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001083 return -1;
1084}
1085
1086
1087static char find__doc__[] =
1088"S.find(sub [,start [,end]]) -> int\n\
1089\n\
1090Return the lowest index in S where substring sub is found,\n\
1091such that sub is contained within s[start,end]. Optional\n\
1092arguments start and end are interpreted as in slice notation.\n\
1093\n\
1094Return -1 on failure.";
1095
1096static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001097string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001098{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001099 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001100 if (result == -2)
1101 return NULL;
1102 return PyInt_FromLong(result);
1103}
1104
1105
1106static char index__doc__[] =
1107"S.index(sub [,start [,end]]) -> int\n\
1108\n\
1109Like S.find() but raise ValueError when the substring is not found.";
1110
1111static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001112string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001114 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001115 if (result == -2)
1116 return NULL;
1117 if (result == -1) {
1118 PyErr_SetString(PyExc_ValueError,
1119 "substring not found in string.index");
1120 return NULL;
1121 }
1122 return PyInt_FromLong(result);
1123}
1124
1125
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001126static char rfind__doc__[] =
1127"S.rfind(sub [,start [,end]]) -> int\n\
1128\n\
1129Return the highest index in S where substring sub is found,\n\
1130such that sub is contained within s[start,end]. Optional\n\
1131arguments start and end are interpreted as in slice notation.\n\
1132\n\
1133Return -1 on failure.";
1134
1135static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001136string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001137{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001138 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001139 if (result == -2)
1140 return NULL;
1141 return PyInt_FromLong(result);
1142}
1143
1144
1145static char rindex__doc__[] =
1146"S.rindex(sub [,start [,end]]) -> int\n\
1147\n\
1148Like S.rfind() but raise ValueError when the substring is not found.";
1149
1150static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001151string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001152{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001153 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001154 if (result == -2)
1155 return NULL;
1156 if (result == -1) {
1157 PyErr_SetString(PyExc_ValueError,
1158 "substring not found in string.rindex");
1159 return NULL;
1160 }
1161 return PyInt_FromLong(result);
1162}
1163
1164
1165static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001166do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001167{
1168 char *s = PyString_AS_STRING(self);
1169 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001170
Guido van Rossum43713e52000-02-29 13:59:29 +00001171 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001172 return NULL;
1173
1174 i = 0;
1175 if (striptype != RIGHTSTRIP) {
1176 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1177 i++;
1178 }
1179 }
1180
1181 j = len;
1182 if (striptype != LEFTSTRIP) {
1183 do {
1184 j--;
1185 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1186 j++;
1187 }
1188
1189 if (i == 0 && j == len) {
1190 Py_INCREF(self);
1191 return (PyObject*)self;
1192 }
1193 else
1194 return PyString_FromStringAndSize(s+i, j-i);
1195}
1196
1197
1198static char strip__doc__[] =
1199"S.strip() -> string\n\
1200\n\
1201Return a copy of the string S with leading and trailing\n\
1202whitespace removed.";
1203
1204static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001205string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001206{
1207 return do_strip(self, args, BOTHSTRIP);
1208}
1209
1210
1211static char lstrip__doc__[] =
1212"S.lstrip() -> string\n\
1213\n\
1214Return a copy of the string S with leading whitespace removed.";
1215
1216static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001217string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001218{
1219 return do_strip(self, args, LEFTSTRIP);
1220}
1221
1222
1223static char rstrip__doc__[] =
1224"S.rstrip() -> string\n\
1225\n\
1226Return a copy of the string S with trailing whitespace removed.";
1227
1228static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001229string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001230{
1231 return do_strip(self, args, RIGHTSTRIP);
1232}
1233
1234
1235static char lower__doc__[] =
1236"S.lower() -> string\n\
1237\n\
1238Return a copy of the string S converted to lowercase.";
1239
1240static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001241string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001242{
1243 char *s = PyString_AS_STRING(self), *s_new;
1244 int i, n = PyString_GET_SIZE(self);
1245 PyObject *new;
1246
Guido van Rossum43713e52000-02-29 13:59:29 +00001247 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001248 return NULL;
1249 new = PyString_FromStringAndSize(NULL, n);
1250 if (new == NULL)
1251 return NULL;
1252 s_new = PyString_AsString(new);
1253 for (i = 0; i < n; i++) {
1254 int c = Py_CHARMASK(*s++);
1255 if (isupper(c)) {
1256 *s_new = tolower(c);
1257 } else
1258 *s_new = c;
1259 s_new++;
1260 }
1261 return new;
1262}
1263
1264
1265static char upper__doc__[] =
1266"S.upper() -> string\n\
1267\n\
1268Return a copy of the string S converted to uppercase.";
1269
1270static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001271string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001272{
1273 char *s = PyString_AS_STRING(self), *s_new;
1274 int i, n = PyString_GET_SIZE(self);
1275 PyObject *new;
1276
Guido van Rossum43713e52000-02-29 13:59:29 +00001277 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001278 return NULL;
1279 new = PyString_FromStringAndSize(NULL, n);
1280 if (new == NULL)
1281 return NULL;
1282 s_new = PyString_AsString(new);
1283 for (i = 0; i < n; i++) {
1284 int c = Py_CHARMASK(*s++);
1285 if (islower(c)) {
1286 *s_new = toupper(c);
1287 } else
1288 *s_new = c;
1289 s_new++;
1290 }
1291 return new;
1292}
1293
1294
Guido van Rossum4c08d552000-03-10 22:55:18 +00001295static char title__doc__[] =
1296"S.title() -> string\n\
1297\n\
1298Return a titlecased version of S, i.e. words start with uppercase\n\
1299characters, all remaining cased characters have lowercase.";
1300
1301static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001302string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001303{
1304 char *s = PyString_AS_STRING(self), *s_new;
1305 int i, n = PyString_GET_SIZE(self);
1306 int previous_is_cased = 0;
1307 PyObject *new;
1308
1309 if (!PyArg_ParseTuple(args, ":title"))
1310 return NULL;
1311 new = PyString_FromStringAndSize(NULL, n);
1312 if (new == NULL)
1313 return NULL;
1314 s_new = PyString_AsString(new);
1315 for (i = 0; i < n; i++) {
1316 int c = Py_CHARMASK(*s++);
1317 if (islower(c)) {
1318 if (!previous_is_cased)
1319 c = toupper(c);
1320 previous_is_cased = 1;
1321 } else if (isupper(c)) {
1322 if (previous_is_cased)
1323 c = tolower(c);
1324 previous_is_cased = 1;
1325 } else
1326 previous_is_cased = 0;
1327 *s_new++ = c;
1328 }
1329 return new;
1330}
1331
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332static char capitalize__doc__[] =
1333"S.capitalize() -> string\n\
1334\n\
1335Return a copy of the string S with only its first character\n\
1336capitalized.";
1337
1338static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001339string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001340{
1341 char *s = PyString_AS_STRING(self), *s_new;
1342 int i, n = PyString_GET_SIZE(self);
1343 PyObject *new;
1344
Guido van Rossum43713e52000-02-29 13:59:29 +00001345 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001346 return NULL;
1347 new = PyString_FromStringAndSize(NULL, n);
1348 if (new == NULL)
1349 return NULL;
1350 s_new = PyString_AsString(new);
1351 if (0 < n) {
1352 int c = Py_CHARMASK(*s++);
1353 if (islower(c))
1354 *s_new = toupper(c);
1355 else
1356 *s_new = c;
1357 s_new++;
1358 }
1359 for (i = 1; i < n; i++) {
1360 int c = Py_CHARMASK(*s++);
1361 if (isupper(c))
1362 *s_new = tolower(c);
1363 else
1364 *s_new = c;
1365 s_new++;
1366 }
1367 return new;
1368}
1369
1370
1371static char count__doc__[] =
1372"S.count(sub[, start[, end]]) -> int\n\
1373\n\
1374Return the number of occurrences of substring sub in string\n\
1375S[start:end]. Optional arguments start and end are\n\
1376interpreted as in slice notation.";
1377
1378static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001379string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001380{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001381 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001382 int len = PyString_GET_SIZE(self), n;
1383 int i = 0, last = INT_MAX;
1384 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001385 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001386
Guido van Rossumc6821402000-05-08 14:08:05 +00001387 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1388 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001389 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001390
Guido van Rossum4c08d552000-03-10 22:55:18 +00001391 if (PyString_Check(subobj)) {
1392 sub = PyString_AS_STRING(subobj);
1393 n = PyString_GET_SIZE(subobj);
1394 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001395 else if (PyUnicode_Check(subobj)) {
1396 int count;
1397 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1398 if (count == -1)
1399 return NULL;
1400 else
1401 return PyInt_FromLong((long) count);
1402 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001403 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1404 return NULL;
1405
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001406 if (last > len)
1407 last = len;
1408 if (last < 0)
1409 last += len;
1410 if (last < 0)
1411 last = 0;
1412 if (i < 0)
1413 i += len;
1414 if (i < 0)
1415 i = 0;
1416 m = last + 1 - n;
1417 if (n == 0)
1418 return PyInt_FromLong((long) (m-i));
1419
1420 r = 0;
1421 while (i < m) {
1422 if (!memcmp(s+i, sub, n)) {
1423 r++;
1424 i += n;
1425 } else {
1426 i++;
1427 }
1428 }
1429 return PyInt_FromLong((long) r);
1430}
1431
1432
1433static char swapcase__doc__[] =
1434"S.swapcase() -> string\n\
1435\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001436Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001437converted to lowercase and vice versa.";
1438
1439static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001440string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001441{
1442 char *s = PyString_AS_STRING(self), *s_new;
1443 int i, n = PyString_GET_SIZE(self);
1444 PyObject *new;
1445
Guido van Rossum43713e52000-02-29 13:59:29 +00001446 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001447 return NULL;
1448 new = PyString_FromStringAndSize(NULL, n);
1449 if (new == NULL)
1450 return NULL;
1451 s_new = PyString_AsString(new);
1452 for (i = 0; i < n; i++) {
1453 int c = Py_CHARMASK(*s++);
1454 if (islower(c)) {
1455 *s_new = toupper(c);
1456 }
1457 else if (isupper(c)) {
1458 *s_new = tolower(c);
1459 }
1460 else
1461 *s_new = c;
1462 s_new++;
1463 }
1464 return new;
1465}
1466
1467
1468static char translate__doc__[] =
1469"S.translate(table [,deletechars]) -> string\n\
1470\n\
1471Return a copy of the string S, where all characters occurring\n\
1472in the optional argument deletechars are removed, and the\n\
1473remaining characters have been mapped through the given\n\
1474translation table, which must be a string of length 256.";
1475
1476static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001477string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001478{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001479 register char *input, *output;
1480 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 register int i, c, changed = 0;
1482 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001483 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001484 int inlen, tablen, dellen = 0;
1485 PyObject *result;
1486 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001487 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001488
Guido van Rossum4c08d552000-03-10 22:55:18 +00001489 if (!PyArg_ParseTuple(args, "O|O:translate",
1490 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001491 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001492
1493 if (PyString_Check(tableobj)) {
1494 table1 = PyString_AS_STRING(tableobj);
1495 tablen = PyString_GET_SIZE(tableobj);
1496 }
1497 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001498 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001499 parameter; instead a mapping to None will cause characters
1500 to be deleted. */
1501 if (delobj != NULL) {
1502 PyErr_SetString(PyExc_TypeError,
1503 "deletions are implemented differently for unicode");
1504 return NULL;
1505 }
1506 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1507 }
1508 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001510
1511 if (delobj != NULL) {
1512 if (PyString_Check(delobj)) {
1513 del_table = PyString_AS_STRING(delobj);
1514 dellen = PyString_GET_SIZE(delobj);
1515 }
1516 else if (PyUnicode_Check(delobj)) {
1517 PyErr_SetString(PyExc_TypeError,
1518 "deletions are implemented differently for unicode");
1519 return NULL;
1520 }
1521 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1522 return NULL;
1523
1524 if (tablen != 256) {
1525 PyErr_SetString(PyExc_ValueError,
1526 "translation table must be 256 characters long");
1527 return NULL;
1528 }
1529 }
1530 else {
1531 del_table = NULL;
1532 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001533 }
1534
1535 table = table1;
1536 inlen = PyString_Size(input_obj);
1537 result = PyString_FromStringAndSize((char *)NULL, inlen);
1538 if (result == NULL)
1539 return NULL;
1540 output_start = output = PyString_AsString(result);
1541 input = PyString_AsString(input_obj);
1542
1543 if (dellen == 0) {
1544 /* If no deletions are required, use faster code */
1545 for (i = inlen; --i >= 0; ) {
1546 c = Py_CHARMASK(*input++);
1547 if (Py_CHARMASK((*output++ = table[c])) != c)
1548 changed = 1;
1549 }
1550 if (changed)
1551 return result;
1552 Py_DECREF(result);
1553 Py_INCREF(input_obj);
1554 return input_obj;
1555 }
1556
1557 for (i = 0; i < 256; i++)
1558 trans_table[i] = Py_CHARMASK(table[i]);
1559
1560 for (i = 0; i < dellen; i++)
1561 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1562
1563 for (i = inlen; --i >= 0; ) {
1564 c = Py_CHARMASK(*input++);
1565 if (trans_table[c] != -1)
1566 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1567 continue;
1568 changed = 1;
1569 }
1570 if (!changed) {
1571 Py_DECREF(result);
1572 Py_INCREF(input_obj);
1573 return input_obj;
1574 }
1575 /* Fix the size of the resulting string */
1576 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1577 return NULL;
1578 return result;
1579}
1580
1581
1582/* What follows is used for implementing replace(). Perry Stoll. */
1583
1584/*
1585 mymemfind
1586
1587 strstr replacement for arbitrary blocks of memory.
1588
Barry Warsaw51ac5802000-03-20 16:36:48 +00001589 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001590 contents of memory pointed to by PAT. Returns the index into MEM if
1591 found, or -1 if not found. If len of PAT is greater than length of
1592 MEM, the function returns -1.
1593*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001594static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001595mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001596{
1597 register int ii;
1598
1599 /* pattern can not occur in the last pat_len-1 chars */
1600 len -= pat_len;
1601
1602 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001603 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001604 return ii;
1605 }
1606 }
1607 return -1;
1608}
1609
1610/*
1611 mymemcnt
1612
1613 Return the number of distinct times PAT is found in MEM.
1614 meaning mem=1111 and pat==11 returns 2.
1615 mem=11111 and pat==11 also return 2.
1616 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001617static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001618mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001619{
1620 register int offset = 0;
1621 int nfound = 0;
1622
1623 while (len >= 0) {
1624 offset = mymemfind(mem, len, pat, pat_len);
1625 if (offset == -1)
1626 break;
1627 mem += offset + pat_len;
1628 len -= offset + pat_len;
1629 nfound++;
1630 }
1631 return nfound;
1632}
1633
1634/*
1635 mymemreplace
1636
Thomas Wouters7e474022000-07-16 12:04:32 +00001637 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001638 replaced with SUB.
1639
Thomas Wouters7e474022000-07-16 12:04:32 +00001640 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001641 of PAT in STR, then the original string is returned. Otherwise, a new
1642 string is allocated here and returned.
1643
1644 on return, out_len is:
1645 the length of output string, or
1646 -1 if the input string is returned, or
1647 unchanged if an error occurs (no memory).
1648
1649 return value is:
1650 the new string allocated locally, or
1651 NULL if an error occurred.
1652*/
1653static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001654mymemreplace(const char *str, int len, /* input string */
1655 const char *pat, int pat_len, /* pattern string to find */
1656 const char *sub, int sub_len, /* substitution string */
1657 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001658 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001659{
1660 char *out_s;
1661 char *new_s;
1662 int nfound, offset, new_len;
1663
1664 if (len == 0 || pat_len > len)
1665 goto return_same;
1666
1667 /* find length of output string */
1668 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001669 if (count < 0)
1670 count = INT_MAX;
1671 else if (nfound > count)
1672 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001673 if (nfound == 0)
1674 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001675
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001676 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001677 if (new_len == 0) {
1678 /* Have to allocate something for the caller to free(). */
1679 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001680 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001681 return NULL;
1682 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001683 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001684 else {
1685 assert(new_len > 0);
1686 new_s = (char *)PyMem_MALLOC(new_len);
1687 if (new_s == NULL)
1688 return NULL;
1689 out_s = new_s;
1690
Tim Peters9c012af2001-05-10 00:32:57 +00001691 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001692 /* find index of next instance of pattern */
1693 offset = mymemfind(str, len, pat, pat_len);
1694 if (offset == -1)
1695 break;
1696
1697 /* copy non matching part of input string */
1698 memcpy(new_s, str, offset);
1699 str += offset + pat_len;
1700 len -= offset + pat_len;
1701
1702 /* copy substitute into the output string */
1703 new_s += offset;
1704 memcpy(new_s, sub, sub_len);
1705 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001706 }
1707 /* copy any remaining values into output string */
1708 if (len > 0)
1709 memcpy(new_s, str, len);
1710 }
1711 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001712 return out_s;
1713
1714 return_same:
1715 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001716 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001717}
1718
1719
1720static char replace__doc__[] =
1721"S.replace (old, new[, maxsplit]) -> string\n\
1722\n\
1723Return a copy of string S with all occurrences of substring\n\
1724old replaced by new. If the optional argument maxsplit is\n\
1725given, only the first maxsplit occurrences are replaced.";
1726
1727static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001728string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001729{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001730 const char *str = PyString_AS_STRING(self), *sub, *repl;
1731 char *new_s;
1732 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1733 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001734 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001735 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001736
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737 if (!PyArg_ParseTuple(args, "OO|i:replace",
1738 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001739 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001740
1741 if (PyString_Check(subobj)) {
1742 sub = PyString_AS_STRING(subobj);
1743 sub_len = PyString_GET_SIZE(subobj);
1744 }
1745 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001746 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001747 subobj, replobj, count);
1748 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1749 return NULL;
1750
1751 if (PyString_Check(replobj)) {
1752 repl = PyString_AS_STRING(replobj);
1753 repl_len = PyString_GET_SIZE(replobj);
1754 }
1755 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001756 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001757 subobj, replobj, count);
1758 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1759 return NULL;
1760
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001761 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001762 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001763 return NULL;
1764 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001765 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001766 if (new_s == NULL) {
1767 PyErr_NoMemory();
1768 return NULL;
1769 }
1770 if (out_len == -1) {
1771 /* we're returning another reference to self */
1772 new = (PyObject*)self;
1773 Py_INCREF(new);
1774 }
1775 else {
1776 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001777 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001778 }
1779 return new;
1780}
1781
1782
1783static char startswith__doc__[] =
1784"S.startswith(prefix[, start[, end]]) -> int\n\
1785\n\
1786Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1787optional start, test S beginning at that position. With optional end, stop\n\
1788comparing S at that position.";
1789
1790static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001791string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001793 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001794 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001795 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796 int plen;
1797 int start = 0;
1798 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001799 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001800
Guido van Rossumc6821402000-05-08 14:08:05 +00001801 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1802 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001803 return NULL;
1804 if (PyString_Check(subobj)) {
1805 prefix = PyString_AS_STRING(subobj);
1806 plen = PyString_GET_SIZE(subobj);
1807 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001808 else if (PyUnicode_Check(subobj)) {
1809 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001810 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001811 subobj, start, end, -1);
1812 if (rc == -1)
1813 return NULL;
1814 else
1815 return PyInt_FromLong((long) rc);
1816 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001817 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818 return NULL;
1819
1820 /* adopt Java semantics for index out of range. it is legal for
1821 * offset to be == plen, but this only returns true if prefix is
1822 * the empty string.
1823 */
1824 if (start < 0 || start+plen > len)
1825 return PyInt_FromLong(0);
1826
1827 if (!memcmp(str+start, prefix, plen)) {
1828 /* did the match end after the specified end? */
1829 if (end < 0)
1830 return PyInt_FromLong(1);
1831 else if (end - start < plen)
1832 return PyInt_FromLong(0);
1833 else
1834 return PyInt_FromLong(1);
1835 }
1836 else return PyInt_FromLong(0);
1837}
1838
1839
1840static char endswith__doc__[] =
1841"S.endswith(suffix[, start[, end]]) -> int\n\
1842\n\
1843Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1844optional start, test S beginning at that position. With optional end, stop\n\
1845comparing S at that position.";
1846
1847static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001848string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001849{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001850 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001851 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001852 const char* suffix;
1853 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001854 int start = 0;
1855 int end = -1;
1856 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001857 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001858
Guido van Rossumc6821402000-05-08 14:08:05 +00001859 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1860 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001861 return NULL;
1862 if (PyString_Check(subobj)) {
1863 suffix = PyString_AS_STRING(subobj);
1864 slen = PyString_GET_SIZE(subobj);
1865 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001866 else if (PyUnicode_Check(subobj)) {
1867 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001868 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001869 subobj, start, end, +1);
1870 if (rc == -1)
1871 return NULL;
1872 else
1873 return PyInt_FromLong((long) rc);
1874 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001875 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001876 return NULL;
1877
Guido van Rossum4c08d552000-03-10 22:55:18 +00001878 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001879 return PyInt_FromLong(0);
1880
1881 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001882 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001883
Guido van Rossum4c08d552000-03-10 22:55:18 +00001884 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001885 return PyInt_FromLong(1);
1886 else return PyInt_FromLong(0);
1887}
1888
1889
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001890static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001891"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001892\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001893Encodes S using the codec registered for encoding. encoding defaults\n\
1894to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001895handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1896a ValueError. Other possible values are 'ignore' and 'replace'.";
1897
1898static PyObject *
1899string_encode(PyStringObject *self, PyObject *args)
1900{
1901 char *encoding = NULL;
1902 char *errors = NULL;
1903 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1904 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001905 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
1906}
1907
1908
1909static char decode__doc__[] =
1910"S.decode([encoding[,errors]]) -> object\n\
1911\n\
1912Decodes S using the codec registered for encoding. encoding defaults\n\
1913to the default encoding. errors may be given to set a different error\n\
1914handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1915a ValueError. Other possible values are 'ignore' and 'replace'.";
1916
1917static PyObject *
1918string_decode(PyStringObject *self, PyObject *args)
1919{
1920 char *encoding = NULL;
1921 char *errors = NULL;
1922 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
1923 return NULL;
1924 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001925}
1926
1927
Guido van Rossum4c08d552000-03-10 22:55:18 +00001928static char expandtabs__doc__[] =
1929"S.expandtabs([tabsize]) -> string\n\
1930\n\
1931Return a copy of S where all tab characters are expanded using spaces.\n\
1932If tabsize is not given, a tab size of 8 characters is assumed.";
1933
1934static PyObject*
1935string_expandtabs(PyStringObject *self, PyObject *args)
1936{
1937 const char *e, *p;
1938 char *q;
1939 int i, j;
1940 PyObject *u;
1941 int tabsize = 8;
1942
1943 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1944 return NULL;
1945
Thomas Wouters7e474022000-07-16 12:04:32 +00001946 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001947 i = j = 0;
1948 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1949 for (p = PyString_AS_STRING(self); p < e; p++)
1950 if (*p == '\t') {
1951 if (tabsize > 0)
1952 j += tabsize - (j % tabsize);
1953 }
1954 else {
1955 j++;
1956 if (*p == '\n' || *p == '\r') {
1957 i += j;
1958 j = 0;
1959 }
1960 }
1961
1962 /* Second pass: create output string and fill it */
1963 u = PyString_FromStringAndSize(NULL, i + j);
1964 if (!u)
1965 return NULL;
1966
1967 j = 0;
1968 q = PyString_AS_STRING(u);
1969
1970 for (p = PyString_AS_STRING(self); p < e; p++)
1971 if (*p == '\t') {
1972 if (tabsize > 0) {
1973 i = tabsize - (j % tabsize);
1974 j += i;
1975 while (i--)
1976 *q++ = ' ';
1977 }
1978 }
1979 else {
1980 j++;
1981 *q++ = *p;
1982 if (*p == '\n' || *p == '\r')
1983 j = 0;
1984 }
1985
1986 return u;
1987}
1988
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001989static
1990PyObject *pad(PyStringObject *self,
1991 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001992 int right,
1993 char fill)
1994{
1995 PyObject *u;
1996
1997 if (left < 0)
1998 left = 0;
1999 if (right < 0)
2000 right = 0;
2001
2002 if (left == 0 && right == 0) {
2003 Py_INCREF(self);
2004 return (PyObject *)self;
2005 }
2006
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002007 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002008 left + PyString_GET_SIZE(self) + right);
2009 if (u) {
2010 if (left)
2011 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002012 memcpy(PyString_AS_STRING(u) + left,
2013 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002014 PyString_GET_SIZE(self));
2015 if (right)
2016 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2017 fill, right);
2018 }
2019
2020 return u;
2021}
2022
2023static char ljust__doc__[] =
2024"S.ljust(width) -> string\n\
2025\n\
2026Return S left justified in a string of length width. Padding is\n\
2027done using spaces.";
2028
2029static PyObject *
2030string_ljust(PyStringObject *self, PyObject *args)
2031{
2032 int width;
2033 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2034 return NULL;
2035
2036 if (PyString_GET_SIZE(self) >= width) {
2037 Py_INCREF(self);
2038 return (PyObject*) self;
2039 }
2040
2041 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2042}
2043
2044
2045static char rjust__doc__[] =
2046"S.rjust(width) -> string\n\
2047\n\
2048Return S right justified in a string of length width. Padding is\n\
2049done using spaces.";
2050
2051static PyObject *
2052string_rjust(PyStringObject *self, PyObject *args)
2053{
2054 int width;
2055 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2056 return NULL;
2057
2058 if (PyString_GET_SIZE(self) >= width) {
2059 Py_INCREF(self);
2060 return (PyObject*) self;
2061 }
2062
2063 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2064}
2065
2066
2067static char center__doc__[] =
2068"S.center(width) -> string\n\
2069\n\
2070Return S centered in a string of length width. Padding is done\n\
2071using spaces.";
2072
2073static PyObject *
2074string_center(PyStringObject *self, PyObject *args)
2075{
2076 int marg, left;
2077 int width;
2078
2079 if (!PyArg_ParseTuple(args, "i:center", &width))
2080 return NULL;
2081
2082 if (PyString_GET_SIZE(self) >= width) {
2083 Py_INCREF(self);
2084 return (PyObject*) self;
2085 }
2086
2087 marg = width - PyString_GET_SIZE(self);
2088 left = marg / 2 + (marg & width & 1);
2089
2090 return pad(self, left, marg - left, ' ');
2091}
2092
2093#if 0
2094static char zfill__doc__[] =
2095"S.zfill(width) -> string\n\
2096\n\
2097Pad a numeric string x with zeros on the left, to fill a field\n\
2098of the specified width. The string x is never truncated.";
2099
2100static PyObject *
2101string_zfill(PyStringObject *self, PyObject *args)
2102{
2103 int fill;
2104 PyObject *u;
2105 char *str;
2106
2107 int width;
2108 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2109 return NULL;
2110
2111 if (PyString_GET_SIZE(self) >= width) {
2112 Py_INCREF(self);
2113 return (PyObject*) self;
2114 }
2115
2116 fill = width - PyString_GET_SIZE(self);
2117
2118 u = pad(self, fill, 0, '0');
2119 if (u == NULL)
2120 return NULL;
2121
2122 str = PyString_AS_STRING(u);
2123 if (str[fill] == '+' || str[fill] == '-') {
2124 /* move sign to beginning of string */
2125 str[0] = str[fill];
2126 str[fill] = '0';
2127 }
2128
2129 return u;
2130}
2131#endif
2132
2133static char isspace__doc__[] =
2134"S.isspace() -> int\n\
2135\n\
2136Return 1 if there are only whitespace characters in S,\n\
21370 otherwise.";
2138
2139static PyObject*
2140string_isspace(PyStringObject *self, PyObject *args)
2141{
Fred Drakeba096332000-07-09 07:04:36 +00002142 register const unsigned char *p
2143 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002144 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002145
2146 if (!PyArg_NoArgs(args))
2147 return NULL;
2148
2149 /* Shortcut for single character strings */
2150 if (PyString_GET_SIZE(self) == 1 &&
2151 isspace(*p))
2152 return PyInt_FromLong(1);
2153
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002154 /* Special case for empty strings */
2155 if (PyString_GET_SIZE(self) == 0)
2156 return PyInt_FromLong(0);
2157
Guido van Rossum4c08d552000-03-10 22:55:18 +00002158 e = p + PyString_GET_SIZE(self);
2159 for (; p < e; p++) {
2160 if (!isspace(*p))
2161 return PyInt_FromLong(0);
2162 }
2163 return PyInt_FromLong(1);
2164}
2165
2166
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002167static char isalpha__doc__[] =
2168"S.isalpha() -> int\n\
2169\n\
2170Return 1 if all characters in S are alphabetic\n\
2171and there is at least one character in S, 0 otherwise.";
2172
2173static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002174string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002175{
Fred Drakeba096332000-07-09 07:04:36 +00002176 register const unsigned char *p
2177 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002178 register const unsigned char *e;
2179
2180 if (!PyArg_NoArgs(args))
2181 return NULL;
2182
2183 /* Shortcut for single character strings */
2184 if (PyString_GET_SIZE(self) == 1 &&
2185 isalpha(*p))
2186 return PyInt_FromLong(1);
2187
2188 /* Special case for empty strings */
2189 if (PyString_GET_SIZE(self) == 0)
2190 return PyInt_FromLong(0);
2191
2192 e = p + PyString_GET_SIZE(self);
2193 for (; p < e; p++) {
2194 if (!isalpha(*p))
2195 return PyInt_FromLong(0);
2196 }
2197 return PyInt_FromLong(1);
2198}
2199
2200
2201static char isalnum__doc__[] =
2202"S.isalnum() -> int\n\
2203\n\
2204Return 1 if all characters in S are alphanumeric\n\
2205and there is at least one character in S, 0 otherwise.";
2206
2207static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002208string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002209{
Fred Drakeba096332000-07-09 07:04:36 +00002210 register const unsigned char *p
2211 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002212 register const unsigned char *e;
2213
2214 if (!PyArg_NoArgs(args))
2215 return NULL;
2216
2217 /* Shortcut for single character strings */
2218 if (PyString_GET_SIZE(self) == 1 &&
2219 isalnum(*p))
2220 return PyInt_FromLong(1);
2221
2222 /* Special case for empty strings */
2223 if (PyString_GET_SIZE(self) == 0)
2224 return PyInt_FromLong(0);
2225
2226 e = p + PyString_GET_SIZE(self);
2227 for (; p < e; p++) {
2228 if (!isalnum(*p))
2229 return PyInt_FromLong(0);
2230 }
2231 return PyInt_FromLong(1);
2232}
2233
2234
Guido van Rossum4c08d552000-03-10 22:55:18 +00002235static char isdigit__doc__[] =
2236"S.isdigit() -> int\n\
2237\n\
2238Return 1 if there are only digit characters in S,\n\
22390 otherwise.";
2240
2241static PyObject*
2242string_isdigit(PyStringObject *self, PyObject *args)
2243{
Fred Drakeba096332000-07-09 07:04:36 +00002244 register const unsigned char *p
2245 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002246 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002247
2248 if (!PyArg_NoArgs(args))
2249 return NULL;
2250
2251 /* Shortcut for single character strings */
2252 if (PyString_GET_SIZE(self) == 1 &&
2253 isdigit(*p))
2254 return PyInt_FromLong(1);
2255
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002256 /* Special case for empty strings */
2257 if (PyString_GET_SIZE(self) == 0)
2258 return PyInt_FromLong(0);
2259
Guido van Rossum4c08d552000-03-10 22:55:18 +00002260 e = p + PyString_GET_SIZE(self);
2261 for (; p < e; p++) {
2262 if (!isdigit(*p))
2263 return PyInt_FromLong(0);
2264 }
2265 return PyInt_FromLong(1);
2266}
2267
2268
2269static char islower__doc__[] =
2270"S.islower() -> int\n\
2271\n\
2272Return 1 if all cased characters in S are lowercase and there is\n\
2273at least one cased character in S, 0 otherwise.";
2274
2275static PyObject*
2276string_islower(PyStringObject *self, PyObject *args)
2277{
Fred Drakeba096332000-07-09 07:04:36 +00002278 register const unsigned char *p
2279 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002280 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 int cased;
2282
2283 if (!PyArg_NoArgs(args))
2284 return NULL;
2285
2286 /* Shortcut for single character strings */
2287 if (PyString_GET_SIZE(self) == 1)
2288 return PyInt_FromLong(islower(*p) != 0);
2289
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002290 /* Special case for empty strings */
2291 if (PyString_GET_SIZE(self) == 0)
2292 return PyInt_FromLong(0);
2293
Guido van Rossum4c08d552000-03-10 22:55:18 +00002294 e = p + PyString_GET_SIZE(self);
2295 cased = 0;
2296 for (; p < e; p++) {
2297 if (isupper(*p))
2298 return PyInt_FromLong(0);
2299 else if (!cased && islower(*p))
2300 cased = 1;
2301 }
2302 return PyInt_FromLong(cased);
2303}
2304
2305
2306static char isupper__doc__[] =
2307"S.isupper() -> int\n\
2308\n\
2309Return 1 if all cased characters in S are uppercase and there is\n\
2310at least one cased character in S, 0 otherwise.";
2311
2312static PyObject*
2313string_isupper(PyStringObject *self, PyObject *args)
2314{
Fred Drakeba096332000-07-09 07:04:36 +00002315 register const unsigned char *p
2316 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002317 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002318 int cased;
2319
2320 if (!PyArg_NoArgs(args))
2321 return NULL;
2322
2323 /* Shortcut for single character strings */
2324 if (PyString_GET_SIZE(self) == 1)
2325 return PyInt_FromLong(isupper(*p) != 0);
2326
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002327 /* Special case for empty strings */
2328 if (PyString_GET_SIZE(self) == 0)
2329 return PyInt_FromLong(0);
2330
Guido van Rossum4c08d552000-03-10 22:55:18 +00002331 e = p + PyString_GET_SIZE(self);
2332 cased = 0;
2333 for (; p < e; p++) {
2334 if (islower(*p))
2335 return PyInt_FromLong(0);
2336 else if (!cased && isupper(*p))
2337 cased = 1;
2338 }
2339 return PyInt_FromLong(cased);
2340}
2341
2342
2343static char istitle__doc__[] =
2344"S.istitle() -> int\n\
2345\n\
2346Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2347may only follow uncased characters and lowercase characters only cased\n\
2348ones. Return 0 otherwise.";
2349
2350static PyObject*
2351string_istitle(PyStringObject *self, PyObject *args)
2352{
Fred Drakeba096332000-07-09 07:04:36 +00002353 register const unsigned char *p
2354 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002355 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 int cased, previous_is_cased;
2357
2358 if (!PyArg_NoArgs(args))
2359 return NULL;
2360
2361 /* Shortcut for single character strings */
2362 if (PyString_GET_SIZE(self) == 1)
2363 return PyInt_FromLong(isupper(*p) != 0);
2364
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002365 /* Special case for empty strings */
2366 if (PyString_GET_SIZE(self) == 0)
2367 return PyInt_FromLong(0);
2368
Guido van Rossum4c08d552000-03-10 22:55:18 +00002369 e = p + PyString_GET_SIZE(self);
2370 cased = 0;
2371 previous_is_cased = 0;
2372 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002373 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374
2375 if (isupper(ch)) {
2376 if (previous_is_cased)
2377 return PyInt_FromLong(0);
2378 previous_is_cased = 1;
2379 cased = 1;
2380 }
2381 else if (islower(ch)) {
2382 if (!previous_is_cased)
2383 return PyInt_FromLong(0);
2384 previous_is_cased = 1;
2385 cased = 1;
2386 }
2387 else
2388 previous_is_cased = 0;
2389 }
2390 return PyInt_FromLong(cased);
2391}
2392
2393
2394static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002395"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002396\n\
2397Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002398Line breaks are not included in the resulting list unless keepends\n\
2399is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002400
2401#define SPLIT_APPEND(data, left, right) \
2402 str = PyString_FromStringAndSize(data + left, right - left); \
2403 if (!str) \
2404 goto onError; \
2405 if (PyList_Append(list, str)) { \
2406 Py_DECREF(str); \
2407 goto onError; \
2408 } \
2409 else \
2410 Py_DECREF(str);
2411
2412static PyObject*
2413string_splitlines(PyStringObject *self, PyObject *args)
2414{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002415 register int i;
2416 register int j;
2417 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002418 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002419 PyObject *list;
2420 PyObject *str;
2421 char *data;
2422
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002423 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002424 return NULL;
2425
2426 data = PyString_AS_STRING(self);
2427 len = PyString_GET_SIZE(self);
2428
Guido van Rossum4c08d552000-03-10 22:55:18 +00002429 list = PyList_New(0);
2430 if (!list)
2431 goto onError;
2432
2433 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002434 int eol;
2435
Guido van Rossum4c08d552000-03-10 22:55:18 +00002436 /* Find a line and append it */
2437 while (i < len && data[i] != '\n' && data[i] != '\r')
2438 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002439
2440 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002441 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 if (i < len) {
2443 if (data[i] == '\r' && i + 1 < len &&
2444 data[i+1] == '\n')
2445 i += 2;
2446 else
2447 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002448 if (keepends)
2449 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002450 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002451 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002452 j = i;
2453 }
2454 if (j < len) {
2455 SPLIT_APPEND(data, j, len);
2456 }
2457
2458 return list;
2459
2460 onError:
2461 Py_DECREF(list);
2462 return NULL;
2463}
2464
2465#undef SPLIT_APPEND
2466
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002467
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002468static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002469string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002470 /* Counterparts of the obsolete stropmodule functions; except
2471 string.maketrans(). */
2472 {"join", (PyCFunction)string_join, 1, join__doc__},
2473 {"split", (PyCFunction)string_split, 1, split__doc__},
2474 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2475 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2476 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2477 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2478 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2479 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2480 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002481 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2482 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002483 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2484 {"count", (PyCFunction)string_count, 1, count__doc__},
2485 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2486 {"find", (PyCFunction)string_find, 1, find__doc__},
2487 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002488 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002489 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2490 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2491 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2492 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002493 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2494 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2495 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002496 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2497 {"title", (PyCFunction)string_title, 1, title__doc__},
2498 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2499 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2500 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002501 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002502 {"decode", (PyCFunction)string_decode, 1, decode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002503 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2504 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2505#if 0
2506 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2507#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002508 {NULL, NULL} /* sentinel */
2509};
2510
2511static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002512string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002513{
2514 return Py_FindMethod(string_methods, (PyObject*)s, name);
2515}
2516
2517
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002518PyTypeObject PyString_Type = {
2519 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002520 0,
2521 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002522 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002523 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002524 (destructor)string_dealloc, /*tp_dealloc*/
2525 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002527 0, /*tp_setattr*/
Martin v. Löwiscd353062001-05-24 16:56:35 +00002528 0, /*tp_compare*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002529 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002530 0, /*tp_as_number*/
2531 &string_as_sequence, /*tp_as_sequence*/
2532 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002533 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002534 0, /*tp_call*/
Guido van Rossum189f1df2001-05-01 16:51:53 +00002535 (reprfunc)string_str, /*tp_str*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002536 0, /*tp_getattro*/
2537 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002538 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002539 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002540 0, /*tp_doc*/
Martin v. Löwiscd353062001-05-24 16:56:35 +00002541 0, /*tp_traverse*/
2542 0, /*tp_clear*/
2543 (richcmpfunc)string_richcompare, /*tp_richcompare*/
2544 0, /*tp_weaklistoffset*/
2545 0, /*tp_iter*/
2546 0, /*tp_iternext*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002547};
2548
2549void
Fred Drakeba096332000-07-09 07:04:36 +00002550PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002551{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002552 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002553 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002554 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002555 if (w == NULL || !PyString_Check(*pv)) {
2556 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002557 *pv = NULL;
2558 return;
2559 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002560 v = string_concat((PyStringObject *) *pv, w);
2561 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002562 *pv = v;
2563}
2564
Guido van Rossum013142a1994-08-30 08:19:36 +00002565void
Fred Drakeba096332000-07-09 07:04:36 +00002566PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002567{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002568 PyString_Concat(pv, w);
2569 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002570}
2571
2572
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002573/* The following function breaks the notion that strings are immutable:
2574 it changes the size of a string. We get away with this only if there
2575 is only one module referencing the object. You can also think of it
2576 as creating a new string object and destroying the old one, only
2577 more efficiently. In any case, don't use this if the string may
2578 already be known to some other part of the code... */
2579
2580int
Fred Drakeba096332000-07-09 07:04:36 +00002581_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002582{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002583 register PyObject *v;
2584 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002585 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002586 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002587 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002588 Py_DECREF(v);
2589 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002590 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002591 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002592 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002593#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002594 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002595#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002596 _Py_ForgetReference(v);
2597 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002598 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002599 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002600 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002601 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002602 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002603 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002604 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002605 _Py_NewReference(*pv);
2606 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002607 sv->ob_size = newsize;
2608 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002609 return 0;
2610}
Guido van Rossume5372401993-03-16 12:15:04 +00002611
2612/* Helpers for formatstring */
2613
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002614static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002615getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002616{
2617 int argidx = *p_argidx;
2618 if (argidx < arglen) {
2619 (*p_argidx)++;
2620 if (arglen < 0)
2621 return args;
2622 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002623 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002624 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002625 PyErr_SetString(PyExc_TypeError,
2626 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002627 return NULL;
2628}
2629
Tim Peters38fd5b62000-09-21 05:43:11 +00002630/* Format codes
2631 * F_LJUST '-'
2632 * F_SIGN '+'
2633 * F_BLANK ' '
2634 * F_ALT '#'
2635 * F_ZERO '0'
2636 */
Guido van Rossume5372401993-03-16 12:15:04 +00002637#define F_LJUST (1<<0)
2638#define F_SIGN (1<<1)
2639#define F_BLANK (1<<2)
2640#define F_ALT (1<<3)
2641#define F_ZERO (1<<4)
2642
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002643static int
Fred Drakeba096332000-07-09 07:04:36 +00002644formatfloat(char *buf, size_t buflen, int flags,
2645 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002646{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002647 /* fmt = '%#.' + `prec` + `type`
2648 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002649 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002650 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002651 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002652 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002653 if (prec < 0)
2654 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002655 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2656 type = 'g';
2657 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002658 /* worst case length calc to ensure no buffer overrun:
2659 fmt = %#.<prec>g
2660 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002661 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002662 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2663 If prec=0 the effective precision is 1 (the leading digit is
2664 always given), therefore increase by one to 10+prec. */
2665 if (buflen <= (size_t)10 + (size_t)prec) {
2666 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002667 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002668 return -1;
2669 }
Guido van Rossume5372401993-03-16 12:15:04 +00002670 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002671 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002672}
2673
Tim Peters38fd5b62000-09-21 05:43:11 +00002674/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2675 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2676 * Python's regular ints.
2677 * Return value: a new PyString*, or NULL if error.
2678 * . *pbuf is set to point into it,
2679 * *plen set to the # of chars following that.
2680 * Caller must decref it when done using pbuf.
2681 * The string starting at *pbuf is of the form
2682 * "-"? ("0x" | "0X")? digit+
2683 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002684 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002685 * There will be at least prec digits, zero-filled on the left if
2686 * necessary to get that many.
2687 * val object to be converted
2688 * flags bitmask of format flags; only F_ALT is looked at
2689 * prec minimum number of digits; 0-fill on left if needed
2690 * type a character in [duoxX]; u acts the same as d
2691 *
2692 * CAUTION: o, x and X conversions on regular ints can never
2693 * produce a '-' sign, but can for Python's unbounded ints.
2694 */
2695PyObject*
2696_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2697 char **pbuf, int *plen)
2698{
2699 PyObject *result = NULL;
2700 char *buf;
2701 int i;
2702 int sign; /* 1 if '-', else 0 */
2703 int len; /* number of characters */
2704 int numdigits; /* len == numnondigits + numdigits */
2705 int numnondigits = 0;
2706
2707 switch (type) {
2708 case 'd':
2709 case 'u':
2710 result = val->ob_type->tp_str(val);
2711 break;
2712 case 'o':
2713 result = val->ob_type->tp_as_number->nb_oct(val);
2714 break;
2715 case 'x':
2716 case 'X':
2717 numnondigits = 2;
2718 result = val->ob_type->tp_as_number->nb_hex(val);
2719 break;
2720 default:
2721 assert(!"'type' not in [duoxX]");
2722 }
2723 if (!result)
2724 return NULL;
2725
2726 /* To modify the string in-place, there can only be one reference. */
2727 if (result->ob_refcnt != 1) {
2728 PyErr_BadInternalCall();
2729 return NULL;
2730 }
2731 buf = PyString_AsString(result);
2732 len = PyString_Size(result);
2733 if (buf[len-1] == 'L') {
2734 --len;
2735 buf[len] = '\0';
2736 }
2737 sign = buf[0] == '-';
2738 numnondigits += sign;
2739 numdigits = len - numnondigits;
2740 assert(numdigits > 0);
2741
Tim Petersfff53252001-04-12 18:38:48 +00002742 /* Get rid of base marker unless F_ALT */
2743 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002744 /* Need to skip 0x, 0X or 0. */
2745 int skipped = 0;
2746 switch (type) {
2747 case 'o':
2748 assert(buf[sign] == '0');
2749 /* If 0 is only digit, leave it alone. */
2750 if (numdigits > 1) {
2751 skipped = 1;
2752 --numdigits;
2753 }
2754 break;
2755 case 'x':
2756 case 'X':
2757 assert(buf[sign] == '0');
2758 assert(buf[sign + 1] == 'x');
2759 skipped = 2;
2760 numnondigits -= 2;
2761 break;
2762 }
2763 if (skipped) {
2764 buf += skipped;
2765 len -= skipped;
2766 if (sign)
2767 buf[0] = '-';
2768 }
2769 assert(len == numnondigits + numdigits);
2770 assert(numdigits > 0);
2771 }
2772
2773 /* Fill with leading zeroes to meet minimum width. */
2774 if (prec > numdigits) {
2775 PyObject *r1 = PyString_FromStringAndSize(NULL,
2776 numnondigits + prec);
2777 char *b1;
2778 if (!r1) {
2779 Py_DECREF(result);
2780 return NULL;
2781 }
2782 b1 = PyString_AS_STRING(r1);
2783 for (i = 0; i < numnondigits; ++i)
2784 *b1++ = *buf++;
2785 for (i = 0; i < prec - numdigits; i++)
2786 *b1++ = '0';
2787 for (i = 0; i < numdigits; i++)
2788 *b1++ = *buf++;
2789 *b1 = '\0';
2790 Py_DECREF(result);
2791 result = r1;
2792 buf = PyString_AS_STRING(result);
2793 len = numnondigits + prec;
2794 }
2795
2796 /* Fix up case for hex conversions. */
2797 switch (type) {
2798 case 'x':
2799 /* Need to convert all upper case letters to lower case. */
2800 for (i = 0; i < len; i++)
2801 if (buf[i] >= 'A' && buf[i] <= 'F')
2802 buf[i] += 'a'-'A';
2803 break;
2804 case 'X':
2805 /* Need to convert 0x to 0X (and -0x to -0X). */
2806 if (buf[sign + 1] == 'x')
2807 buf[sign + 1] = 'X';
2808 break;
2809 }
2810 *pbuf = buf;
2811 *plen = len;
2812 return result;
2813}
2814
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002815static int
Fred Drakeba096332000-07-09 07:04:36 +00002816formatint(char *buf, size_t buflen, int flags,
2817 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002818{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002819 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002820 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2821 + 1 + 1 = 24 */
2822 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002823 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002824 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002825 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002826 if (prec < 0)
2827 prec = 1;
2828 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002829 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002830 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002831 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002832 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002833 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002834 return -1;
2835 }
Guido van Rossume5372401993-03-16 12:15:04 +00002836 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002837 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2838 * but we want it (for consistency with other %#x conversions, and
2839 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002840 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2841 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2842 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002843 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002844 if (x == 0 &&
2845 (flags & F_ALT) &&
2846 (type == 'x' || type == 'X') &&
2847 buf[1] != (char)type) /* this last always true under std C */
2848 {
Tim Petersfff53252001-04-12 18:38:48 +00002849 memmove(buf+2, buf, strlen(buf) + 1);
2850 buf[0] = '0';
2851 buf[1] = (char)type;
2852 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002853 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002854}
2855
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002856static int
Fred Drakeba096332000-07-09 07:04:36 +00002857formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002858{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002859 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002860 if (PyString_Check(v)) {
2861 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002862 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002863 }
2864 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002865 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002866 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002867 }
2868 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002869 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002870}
2871
Guido van Rossum013142a1994-08-30 08:19:36 +00002872
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002873/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2874
2875 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2876 chars are formatted. XXX This is a magic number. Each formatting
2877 routine does bounds checking to ensure no overflow, but a better
2878 solution may be to malloc a buffer of appropriate size for each
2879 format. For now, the current solution is sufficient.
2880*/
2881#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002882
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002883PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002884PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002885{
2886 char *fmt, *res;
2887 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002888 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002889 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002890 PyObject *dict = NULL;
2891 if (format == NULL || !PyString_Check(format) || args == NULL) {
2892 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002893 return NULL;
2894 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002895 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002896 fmt = PyString_AsString(format);
2897 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002898 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002899 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002900 if (result == NULL)
2901 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002902 res = PyString_AsString(result);
2903 if (PyTuple_Check(args)) {
2904 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002905 argidx = 0;
2906 }
2907 else {
2908 arglen = -1;
2909 argidx = -2;
2910 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002911 if (args->ob_type->tp_as_mapping)
2912 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002913 while (--fmtcnt >= 0) {
2914 if (*fmt != '%') {
2915 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002916 rescnt = fmtcnt + 100;
2917 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002918 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002919 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002920 res = PyString_AsString(result)
2921 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002922 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002923 }
2924 *res++ = *fmt++;
2925 }
2926 else {
2927 /* Got a format specifier */
2928 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002929 int width = -1;
2930 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00002931 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002932 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002933 PyObject *v = NULL;
2934 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002935 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002936 int sign;
2937 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002938 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002939 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002940 int argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002941
Guido van Rossumda9c2711996-12-05 21:58:58 +00002942 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002943 if (*fmt == '(') {
2944 char *keystart;
2945 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002946 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002947 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002948
2949 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002950 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002951 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002952 goto error;
2953 }
2954 ++fmt;
2955 --fmtcnt;
2956 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002957 /* Skip over balanced parentheses */
2958 while (pcount > 0 && --fmtcnt >= 0) {
2959 if (*fmt == ')')
2960 --pcount;
2961 else if (*fmt == '(')
2962 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002963 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002964 }
2965 keylen = fmt - keystart - 1;
2966 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002967 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002968 "incomplete format key");
2969 goto error;
2970 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002971 key = PyString_FromStringAndSize(keystart,
2972 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002973 if (key == NULL)
2974 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002975 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002976 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002977 args_owned = 0;
2978 }
2979 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002980 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002981 if (args == NULL) {
2982 goto error;
2983 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002984 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002985 arglen = -1;
2986 argidx = -2;
2987 }
Guido van Rossume5372401993-03-16 12:15:04 +00002988 while (--fmtcnt >= 0) {
2989 switch (c = *fmt++) {
2990 case '-': flags |= F_LJUST; continue;
2991 case '+': flags |= F_SIGN; continue;
2992 case ' ': flags |= F_BLANK; continue;
2993 case '#': flags |= F_ALT; continue;
2994 case '0': flags |= F_ZERO; continue;
2995 }
2996 break;
2997 }
2998 if (c == '*') {
2999 v = getnextarg(args, arglen, &argidx);
3000 if (v == NULL)
3001 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003002 if (!PyInt_Check(v)) {
3003 PyErr_SetString(PyExc_TypeError,
3004 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003005 goto error;
3006 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003007 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003008 if (width < 0) {
3009 flags |= F_LJUST;
3010 width = -width;
3011 }
Guido van Rossume5372401993-03-16 12:15:04 +00003012 if (--fmtcnt >= 0)
3013 c = *fmt++;
3014 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003015 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003016 width = c - '0';
3017 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003018 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003019 if (!isdigit(c))
3020 break;
3021 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003022 PyErr_SetString(
3023 PyExc_ValueError,
3024 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003025 goto error;
3026 }
3027 width = width*10 + (c - '0');
3028 }
3029 }
3030 if (c == '.') {
3031 prec = 0;
3032 if (--fmtcnt >= 0)
3033 c = *fmt++;
3034 if (c == '*') {
3035 v = getnextarg(args, arglen, &argidx);
3036 if (v == NULL)
3037 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003038 if (!PyInt_Check(v)) {
3039 PyErr_SetString(
3040 PyExc_TypeError,
3041 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003042 goto error;
3043 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003044 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003045 if (prec < 0)
3046 prec = 0;
3047 if (--fmtcnt >= 0)
3048 c = *fmt++;
3049 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003050 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003051 prec = c - '0';
3052 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003053 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003054 if (!isdigit(c))
3055 break;
3056 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003057 PyErr_SetString(
3058 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003059 "prec too big");
3060 goto error;
3061 }
3062 prec = prec*10 + (c - '0');
3063 }
3064 }
3065 } /* prec */
3066 if (fmtcnt >= 0) {
3067 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003068 if (--fmtcnt >= 0)
3069 c = *fmt++;
3070 }
3071 }
3072 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003073 PyErr_SetString(PyExc_ValueError,
3074 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003075 goto error;
3076 }
3077 if (c != '%') {
3078 v = getnextarg(args, arglen, &argidx);
3079 if (v == NULL)
3080 goto error;
3081 }
3082 sign = 0;
3083 fill = ' ';
3084 switch (c) {
3085 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003086 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003087 len = 1;
3088 break;
3089 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003090 case 'r':
3091 if (PyUnicode_Check(v)) {
3092 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003093 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003094 goto unicode;
3095 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003096 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003097 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003098 else
3099 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003100 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003101 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003102 if (!PyString_Check(temp)) {
3103 PyErr_SetString(PyExc_TypeError,
3104 "%s argument has non-string str()");
3105 goto error;
3106 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003107 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003108 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003109 if (prec >= 0 && len > prec)
3110 len = prec;
3111 break;
3112 case 'i':
3113 case 'd':
3114 case 'u':
3115 case 'o':
3116 case 'x':
3117 case 'X':
3118 if (c == 'i')
3119 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003120 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003121 temp = _PyString_FormatLong(v, flags,
3122 prec, c, &pbuf, &len);
3123 if (!temp)
3124 goto error;
3125 /* unbounded ints can always produce
3126 a sign character! */
3127 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003128 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003129 else {
3130 pbuf = formatbuf;
3131 len = formatint(pbuf, sizeof(formatbuf),
3132 flags, prec, c, v);
3133 if (len < 0)
3134 goto error;
3135 /* only d conversion is signed */
3136 sign = c == 'd';
3137 }
3138 if (flags & F_ZERO)
3139 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003140 break;
3141 case 'e':
3142 case 'E':
3143 case 'f':
3144 case 'g':
3145 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003146 pbuf = formatbuf;
3147 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003148 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003149 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003150 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003151 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003152 fill = '0';
3153 break;
3154 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003155 pbuf = formatbuf;
3156 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003157 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003158 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003159 break;
3160 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003161 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003162 "unsupported format character '%c' (0x%x) "
3163 "at index %i",
3164 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003165 goto error;
3166 }
3167 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003168 if (*pbuf == '-' || *pbuf == '+') {
3169 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003170 len--;
3171 }
3172 else if (flags & F_SIGN)
3173 sign = '+';
3174 else if (flags & F_BLANK)
3175 sign = ' ';
3176 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003177 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003178 }
3179 if (width < len)
3180 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003181 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003182 reslen -= rescnt;
3183 rescnt = width + fmtcnt + 100;
3184 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003185 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003186 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003187 res = PyString_AsString(result)
3188 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003189 }
3190 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003191 if (fill != ' ')
3192 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003193 rescnt--;
3194 if (width > len)
3195 width--;
3196 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003197 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3198 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003199 assert(pbuf[1] == c);
3200 if (fill != ' ') {
3201 *res++ = *pbuf++;
3202 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003203 }
Tim Petersfff53252001-04-12 18:38:48 +00003204 rescnt -= 2;
3205 width -= 2;
3206 if (width < 0)
3207 width = 0;
3208 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003209 }
3210 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003211 do {
3212 --rescnt;
3213 *res++ = fill;
3214 } while (--width > len);
3215 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003216 if (fill == ' ') {
3217 if (sign)
3218 *res++ = sign;
3219 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003220 (c == 'x' || c == 'X')) {
3221 assert(pbuf[0] == '0');
3222 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003223 *res++ = *pbuf++;
3224 *res++ = *pbuf++;
3225 }
3226 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003227 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003228 res += len;
3229 rescnt -= len;
3230 while (--width >= len) {
3231 --rescnt;
3232 *res++ = ' ';
3233 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003234 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003235 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003236 "not all arguments converted");
3237 goto error;
3238 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003239 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003240 } /* '%' */
3241 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003242 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003243 PyErr_SetString(PyExc_TypeError,
3244 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003245 goto error;
3246 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003247 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003248 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003249 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003250 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003251 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003252
3253 unicode:
3254 if (args_owned) {
3255 Py_DECREF(args);
3256 args_owned = 0;
3257 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003258 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003259 if (PyTuple_Check(orig_args) && argidx > 0) {
3260 PyObject *v;
3261 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3262 v = PyTuple_New(n);
3263 if (v == NULL)
3264 goto error;
3265 while (--n >= 0) {
3266 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3267 Py_INCREF(w);
3268 PyTuple_SET_ITEM(v, n, w);
3269 }
3270 args = v;
3271 } else {
3272 Py_INCREF(orig_args);
3273 args = orig_args;
3274 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003275 args_owned = 1;
3276 /* Take what we have of the result and let the Unicode formatting
3277 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003278 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003279 if (_PyString_Resize(&result, rescnt))
3280 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003281 fmtcnt = PyString_GET_SIZE(format) - \
3282 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003283 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3284 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003285 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003286 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003287 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003288 if (v == NULL)
3289 goto error;
3290 /* Paste what we have (result) to what the Unicode formatting
3291 function returned (v) and return the result (or error) */
3292 w = PyUnicode_Concat(result, v);
3293 Py_DECREF(result);
3294 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003295 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003296 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003297
Guido van Rossume5372401993-03-16 12:15:04 +00003298 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003299 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003300 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003301 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003302 }
Guido van Rossume5372401993-03-16 12:15:04 +00003303 return NULL;
3304}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003305
3306
3307#ifdef INTERN_STRINGS
3308
Barry Warsaw4df762f2000-08-16 23:41:01 +00003309/* This dictionary will leak at PyString_Fini() time. That's acceptable
3310 * because PyString_Fini() specifically frees interned strings that are
3311 * only referenced by this dictionary. The CVS log entry for revision 2.45
3312 * says:
3313 *
3314 * Change the Fini function to only remove otherwise unreferenced
3315 * strings from the interned table. There are references in
3316 * hard-to-find static variables all over the interpreter, and it's not
3317 * worth trying to get rid of all those; but "uninterning" isn't fair
3318 * either and may cause subtle failures later -- so we have to keep them
3319 * in the interned table.
3320 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003321static PyObject *interned;
3322
3323void
Fred Drakeba096332000-07-09 07:04:36 +00003324PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003325{
3326 register PyStringObject *s = (PyStringObject *)(*p);
3327 PyObject *t;
3328 if (s == NULL || !PyString_Check(s))
3329 Py_FatalError("PyString_InternInPlace: strings only please!");
3330 if ((t = s->ob_sinterned) != NULL) {
3331 if (t == (PyObject *)s)
3332 return;
3333 Py_INCREF(t);
3334 *p = t;
3335 Py_DECREF(s);
3336 return;
3337 }
3338 if (interned == NULL) {
3339 interned = PyDict_New();
3340 if (interned == NULL)
3341 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003342 }
3343 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3344 Py_INCREF(t);
3345 *p = s->ob_sinterned = t;
3346 Py_DECREF(s);
3347 return;
3348 }
3349 t = (PyObject *)s;
3350 if (PyDict_SetItem(interned, t, t) == 0) {
3351 s->ob_sinterned = t;
3352 return;
3353 }
3354 PyErr_Clear();
3355}
3356
3357
3358PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003359PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003360{
3361 PyObject *s = PyString_FromString(cp);
3362 if (s == NULL)
3363 return NULL;
3364 PyString_InternInPlace(&s);
3365 return s;
3366}
3367
3368#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003369
3370void
Fred Drakeba096332000-07-09 07:04:36 +00003371PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003372{
3373 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003374 for (i = 0; i < UCHAR_MAX + 1; i++) {
3375 Py_XDECREF(characters[i]);
3376 characters[i] = NULL;
3377 }
3378#ifndef DONT_SHARE_SHORT_STRINGS
3379 Py_XDECREF(nullstring);
3380 nullstring = NULL;
3381#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003382#ifdef INTERN_STRINGS
3383 if (interned) {
3384 int pos, changed;
3385 PyObject *key, *value;
3386 do {
3387 changed = 0;
3388 pos = 0;
3389 while (PyDict_Next(interned, &pos, &key, &value)) {
3390 if (key->ob_refcnt == 2 && key == value) {
3391 PyDict_DelItem(interned, key);
3392 changed = 1;
3393 }
3394 }
3395 } while (changed);
3396 }
3397#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003398}
Barry Warsawa903ad982001-02-23 16:40:48 +00003399
3400#ifdef INTERN_STRINGS
3401void _Py_ReleaseInternedStrings(void)
3402{
3403 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003404 fprintf(stderr, "releasing interned strings\n");
3405 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003406 Py_DECREF(interned);
3407 interned = NULL;
3408 }
3409}
3410#endif /* INTERN_STRINGS */