blob: 9a88d2facc83fb548bae094a1fe3a35afa5509d1 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000150PyObject *PyString_Decode(const char *s,
151 int size,
152 const char *encoding,
153 const char *errors)
154{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000155 PyObject *v, *str;
156
157 str = PyString_FromStringAndSize(s, size);
158 if (str == NULL)
159 return NULL;
160 v = PyString_AsDecodedString(str, encoding, errors);
161 Py_DECREF(str);
162 return v;
163}
164
165PyObject *PyString_AsDecodedObject(PyObject *str,
166 const char *encoding,
167 const char *errors)
168{
169 PyObject *v;
170
171 if (!PyString_Check(str)) {
172 PyErr_BadArgument();
173 goto onError;
174 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000175
176 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000177 encoding = PyUnicode_GetDefaultEncoding();
178
179 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000180 v = PyCodec_Decode(str, encoding, errors);
181 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000182 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000183
184 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000185
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000186 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000187 return NULL;
188}
189
190PyObject *PyString_AsDecodedString(PyObject *str,
191 const char *encoding,
192 const char *errors)
193{
194 PyObject *v;
195
196 v = PyString_AsDecodedObject(str, encoding, errors);
197 if (v == NULL)
198 goto onError;
199
200 /* Convert Unicode to a string using the default encoding */
201 if (PyUnicode_Check(v)) {
202 PyObject *temp = v;
203 v = PyUnicode_AsEncodedString(v, NULL, NULL);
204 Py_DECREF(temp);
205 if (v == NULL)
206 goto onError;
207 }
208 if (!PyString_Check(v)) {
209 PyErr_Format(PyExc_TypeError,
210 "decoder did not return a string object (type=%.400s)",
211 v->ob_type->tp_name);
212 Py_DECREF(v);
213 goto onError;
214 }
215
216 return v;
217
218 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000219 return NULL;
220}
221
222PyObject *PyString_Encode(const char *s,
223 int size,
224 const char *encoding,
225 const char *errors)
226{
227 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000228
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000229 str = PyString_FromStringAndSize(s, size);
230 if (str == NULL)
231 return NULL;
232 v = PyString_AsEncodedString(str, encoding, errors);
233 Py_DECREF(str);
234 return v;
235}
236
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000237PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000238 const char *encoding,
239 const char *errors)
240{
241 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000242
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000243 if (!PyString_Check(str)) {
244 PyErr_BadArgument();
245 goto onError;
246 }
247
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000248 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000249 encoding = PyUnicode_GetDefaultEncoding();
250
251 /* Encode via the codec registry */
252 v = PyCodec_Encode(str, encoding, errors);
253 if (v == NULL)
254 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000255
256 return v;
257
258 onError:
259 return NULL;
260}
261
262PyObject *PyString_AsEncodedString(PyObject *str,
263 const char *encoding,
264 const char *errors)
265{
266 PyObject *v;
267
Marc-André Lemburg8c2133d2001-06-12 13:14:10 +0000268 v = PyString_AsEncodedObject(str, encoding, errors);
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000269 if (v == NULL)
270 goto onError;
271
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000272 /* Convert Unicode to a string using the default encoding */
273 if (PyUnicode_Check(v)) {
274 PyObject *temp = v;
275 v = PyUnicode_AsEncodedString(v, NULL, NULL);
276 Py_DECREF(temp);
277 if (v == NULL)
278 goto onError;
279 }
280 if (!PyString_Check(v)) {
281 PyErr_Format(PyExc_TypeError,
282 "encoder did not return a string object (type=%.400s)",
283 v->ob_type->tp_name);
284 Py_DECREF(v);
285 goto onError;
286 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000287
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000288 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000289
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000290 onError:
291 return NULL;
292}
293
Guido van Rossum234f9421993-06-17 12:35:49 +0000294static void
Fred Drakeba096332000-07-09 07:04:36 +0000295string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000296{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000297 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000298}
299
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000300static int
301string_getsize(register PyObject *op)
302{
303 char *s;
304 int len;
305 if (PyString_AsStringAndSize(op, &s, &len))
306 return -1;
307 return len;
308}
309
310static /*const*/ char *
311string_getbuffer(register PyObject *op)
312{
313 char *s;
314 int len;
315 if (PyString_AsStringAndSize(op, &s, &len))
316 return NULL;
317 return s;
318}
319
Guido van Rossumd7047b31995-01-02 19:07:15 +0000320int
Fred Drakeba096332000-07-09 07:04:36 +0000321PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000323 if (!PyString_Check(op))
324 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000325 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326}
327
328/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000329PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000331 if (!PyString_Check(op))
332 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000333 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334}
335
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000336/* Internal API needed by PyString_AsStringAndSize(): */
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000337extern
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000338PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
339 const char *errors);
340
341int
342PyString_AsStringAndSize(register PyObject *obj,
343 register char **s,
344 register int *len)
345{
346 if (s == NULL) {
347 PyErr_BadInternalCall();
348 return -1;
349 }
350
351 if (!PyString_Check(obj)) {
352 if (PyUnicode_Check(obj)) {
353 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
354 if (obj == NULL)
355 return -1;
356 }
357 else {
358 PyErr_Format(PyExc_TypeError,
359 "expected string or Unicode object, "
360 "%.200s found", obj->ob_type->tp_name);
361 return -1;
362 }
363 }
364
365 *s = PyString_AS_STRING(obj);
366 if (len != NULL)
367 *len = PyString_GET_SIZE(obj);
368 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
369 PyErr_SetString(PyExc_TypeError,
370 "expected string without null bytes");
371 return -1;
372 }
373 return 0;
374}
375
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000376/* Methods */
377
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000378static int
Fred Drakeba096332000-07-09 07:04:36 +0000379string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380{
381 int i;
382 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000383 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000384 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000385 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000386 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000387 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000388 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000389
Thomas Wouters7e474022000-07-16 12:04:32 +0000390 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000391 quote = '\'';
392 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
393 quote = '"';
394
395 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000396 for (i = 0; i < op->ob_size; i++) {
397 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000398 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000399 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000400 else if (c == '\t')
401 fprintf(fp, "\\t");
402 else if (c == '\n')
403 fprintf(fp, "\\n");
404 else if (c == '\r')
405 fprintf(fp, "\\r");
406 else if (c < ' ' || c >= 0x7f)
407 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000408 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000409 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000411 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000412 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413}
414
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000416string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000417{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000418 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
419 PyObject *v;
420 if (newsize > INT_MAX) {
421 PyErr_SetString(PyExc_OverflowError,
422 "string is too large to make repr");
423 }
424 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000425 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000426 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000427 }
428 else {
429 register int i;
430 register char c;
431 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000432 int quote;
433
Thomas Wouters7e474022000-07-16 12:04:32 +0000434 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000435 quote = '\'';
436 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
437 quote = '"';
438
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000440 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000441 for (i = 0; i < op->ob_size; i++) {
442 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000443 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000444 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000445 else if (c == '\t')
446 *p++ = '\\', *p++ = 't';
447 else if (c == '\n')
448 *p++ = '\\', *p++ = 'n';
449 else if (c == '\r')
450 *p++ = '\\', *p++ = 'r';
451 else if (c < ' ' || c >= 0x7f) {
452 sprintf(p, "\\x%02x", c & 0xff);
453 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 }
455 else
456 *p++ = c;
457 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000458 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000460 _PyString_Resize(
461 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000462 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000463 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000464}
465
Guido van Rossum189f1df2001-05-01 16:51:53 +0000466static PyObject *
467string_str(PyObject *s)
468{
469 Py_INCREF(s);
470 return s;
471}
472
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473static int
Fred Drakeba096332000-07-09 07:04:36 +0000474string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000475{
476 return a->ob_size;
477}
478
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000479static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000480string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481{
482 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000483 register PyStringObject *op;
484 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000485 if (PyUnicode_Check(bb))
486 return PyUnicode_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000487 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000488 "cannot add type \"%.200s\" to string",
489 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000490 return NULL;
491 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000492#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493 /* Optimize cases with empty left or right operand */
494 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000495 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000496 return bb;
497 }
498 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000499 Py_INCREF(a);
500 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000501 }
502 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000503 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000504 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000505 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000506 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000507 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000508 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000509#ifdef CACHE_HASH
510 op->ob_shash = -1;
511#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000512#ifdef INTERN_STRINGS
513 op->ob_sinterned = NULL;
514#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000515 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
516 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
517 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000518 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519#undef b
520}
521
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000522static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000523string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524{
525 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000526 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000527 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000528 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529 if (n < 0)
530 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000531 /* watch out for overflows: the size can overflow int,
532 * and the # of bytes needed can overflow size_t
533 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000535 if (n && size / n != a->ob_size) {
536 PyErr_SetString(PyExc_OverflowError,
537 "repeated string is too long");
538 return NULL;
539 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000541 Py_INCREF(a);
542 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000543 }
Tim Peters8f422462000-09-09 06:13:41 +0000544 nbytes = size * sizeof(char);
545 if (nbytes / sizeof(char) != (size_t)size ||
546 nbytes + sizeof(PyStringObject) <= nbytes) {
547 PyErr_SetString(PyExc_OverflowError,
548 "repeated string is too long");
549 return NULL;
550 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000551 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000552 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000553 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000554 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000555 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000556#ifdef CACHE_HASH
557 op->ob_shash = -1;
558#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000559#ifdef INTERN_STRINGS
560 op->ob_sinterned = NULL;
561#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000562 for (i = 0; i < size; i += a->ob_size)
563 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
564 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566}
567
568/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
569
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000570static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000571string_slice(register PyStringObject *a, register int i, register int j)
572 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000573{
574 if (i < 0)
575 i = 0;
576 if (j < 0)
577 j = 0; /* Avoid signed/unsigned bug in next line */
578 if (j > a->ob_size)
579 j = a->ob_size;
580 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000581 Py_INCREF(a);
582 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 }
584 if (j < i)
585 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000587}
588
Guido van Rossum9284a572000-03-07 15:53:43 +0000589static int
Fred Drakeba096332000-07-09 07:04:36 +0000590string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000591{
592 register char *s, *end;
593 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000594 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000595 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000596 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000597 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000598 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000599 return -1;
600 }
601 c = PyString_AsString(el)[0];
602 s = PyString_AsString(a);
603 end = s + PyString_Size(a);
604 while (s < end) {
605 if (c == *s++)
606 return 1;
607 }
608 return 0;
609}
610
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000612string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000614 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000615 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000616 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000617 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000618 return NULL;
619 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000620 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000621 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000622 if (v == NULL)
623 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000624 else {
625#ifdef COUNT_ALLOCS
626 one_strings++;
627#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000628 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000629 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000630 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000631}
632
Martin v. Löwiscd353062001-05-24 16:56:35 +0000633static PyObject*
634string_richcompare(PyStringObject *a, PyStringObject *b, int op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000635{
Martin v. Löwiscd353062001-05-24 16:56:35 +0000636 int c;
637 int len_a, len_b;
638 int min_len;
639 PyObject *result;
640
641 /* One of the objects is a string object. Make sure the
642 other one is one, too. */
643 if (a->ob_type != b->ob_type) {
644 result = Py_NotImplemented;
645 goto out;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000646 }
Martin v. Löwiscd353062001-05-24 16:56:35 +0000647 if (a == b) {
648 switch (op) {
649 case Py_EQ:case Py_LE:case Py_GE:
650 result = Py_True;
651 goto out;
652 case Py_NE:case Py_LT:case Py_GT:
653 result = Py_False;
654 goto out;
655 }
656 }
657 if (op == Py_EQ) {
658 /* Supporting Py_NE here as well does not save
659 much time, since Py_NE is rarely used. */
660 if (a->ob_size == b->ob_size
661 && (a->ob_sval[0] == b->ob_sval[0]
662 && memcmp(a->ob_sval, b->ob_sval,
663 a->ob_size) == 0)) {
664 result = Py_True;
665 } else {
666 result = Py_False;
667 }
668 goto out;
669 }
670 len_a = a->ob_size; len_b = b->ob_size;
671 min_len = (len_a < len_b) ? len_a : len_b;
672 if (min_len > 0) {
673 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
674 if (c==0)
675 c = memcmp(a->ob_sval, b->ob_sval, min_len);
676 }else
677 c = 0;
678 if (c == 0)
679 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
680 switch (op) {
681 case Py_LT: c = c < 0; break;
682 case Py_LE: c = c <= 0; break;
683 case Py_EQ: assert(0); break; /* unreachable */
684 case Py_NE: c = c != 0; break;
685 case Py_GT: c = c > 0; break;
686 case Py_GE: c = c >= 0; break;
687 default:
688 result = Py_NotImplemented;
689 goto out;
690 }
691 result = c ? Py_True : Py_False;
692 out:
693 Py_INCREF(result);
694 return result;
695}
696
697int
698_PyString_Eq(PyObject *o1, PyObject *o2)
699{
700 PyStringObject *a, *b;
701 a = (PyStringObject*)o1;
702 b = (PyStringObject*)o2;
703 return a->ob_size == b->ob_size
704 && *a->ob_sval == *b->ob_sval
705 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000706}
707
Guido van Rossum9bfef441993-03-29 10:43:31 +0000708static long
Fred Drakeba096332000-07-09 07:04:36 +0000709string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000710{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000711 register int len;
712 register unsigned char *p;
713 register long x;
714
715#ifdef CACHE_HASH
716 if (a->ob_shash != -1)
717 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000718#ifdef INTERN_STRINGS
719 if (a->ob_sinterned != NULL)
720 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000721 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000722#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000723#endif
724 len = a->ob_size;
725 p = (unsigned char *) a->ob_sval;
726 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000727 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000728 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000729 x ^= a->ob_size;
730 if (x == -1)
731 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000732#ifdef CACHE_HASH
733 a->ob_shash = x;
734#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000735 return x;
736}
737
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000738static int
Fred Drakeba096332000-07-09 07:04:36 +0000739string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000740{
741 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000742 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000743 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000744 return -1;
745 }
746 *ptr = (void *)self->ob_sval;
747 return self->ob_size;
748}
749
750static int
Fred Drakeba096332000-07-09 07:04:36 +0000751string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000752{
Guido van Rossum045e6881997-09-08 18:30:11 +0000753 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000754 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000755 return -1;
756}
757
758static int
Fred Drakeba096332000-07-09 07:04:36 +0000759string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000760{
761 if ( lenp )
762 *lenp = self->ob_size;
763 return 1;
764}
765
Guido van Rossum1db70701998-10-08 02:18:52 +0000766static int
Fred Drakeba096332000-07-09 07:04:36 +0000767string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000768{
769 if ( index != 0 ) {
770 PyErr_SetString(PyExc_SystemError,
771 "accessing non-existent string segment");
772 return -1;
773 }
774 *ptr = self->ob_sval;
775 return self->ob_size;
776}
777
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000778static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000779 (inquiry)string_length, /*sq_length*/
780 (binaryfunc)string_concat, /*sq_concat*/
781 (intargfunc)string_repeat, /*sq_repeat*/
782 (intargfunc)string_item, /*sq_item*/
783 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000784 0, /*sq_ass_item*/
785 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000786 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000787};
788
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000789static PyBufferProcs string_as_buffer = {
790 (getreadbufferproc)string_buffer_getreadbuf,
791 (getwritebufferproc)string_buffer_getwritebuf,
792 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000793 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000794};
795
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000796
797
798#define LEFTSTRIP 0
799#define RIGHTSTRIP 1
800#define BOTHSTRIP 2
801
802
803static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000804split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000805{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000806 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000807 PyObject* item;
808 PyObject *list = PyList_New(0);
809
810 if (list == NULL)
811 return NULL;
812
Guido van Rossum4c08d552000-03-10 22:55:18 +0000813 for (i = j = 0; i < len; ) {
814 while (i < len && isspace(Py_CHARMASK(s[i])))
815 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000816 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000817 while (i < len && !isspace(Py_CHARMASK(s[i])))
818 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000819 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000820 if (maxsplit-- <= 0)
821 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000822 item = PyString_FromStringAndSize(s+j, (int)(i-j));
823 if (item == NULL)
824 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825 err = PyList_Append(list, item);
826 Py_DECREF(item);
827 if (err < 0)
828 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000829 while (i < len && isspace(Py_CHARMASK(s[i])))
830 i++;
831 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000832 }
833 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000834 if (j < len) {
835 item = PyString_FromStringAndSize(s+j, (int)(len - j));
836 if (item == NULL)
837 goto finally;
838 err = PyList_Append(list, item);
839 Py_DECREF(item);
840 if (err < 0)
841 goto finally;
842 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000843 return list;
844 finally:
845 Py_DECREF(list);
846 return NULL;
847}
848
849
850static char split__doc__[] =
851"S.split([sep [,maxsplit]]) -> list of strings\n\
852\n\
853Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000854delimiter string. If maxsplit is given, at most maxsplit\n\
855splits are done. If sep is not specified, any whitespace string\n\
856is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000857
858static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000859string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000860{
861 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000862 int maxsplit = -1;
863 const char *s = PyString_AS_STRING(self), *sub;
864 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000865
Guido van Rossum4c08d552000-03-10 22:55:18 +0000866 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000867 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000868 if (maxsplit < 0)
869 maxsplit = INT_MAX;
870 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000871 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000872 if (PyString_Check(subobj)) {
873 sub = PyString_AS_STRING(subobj);
874 n = PyString_GET_SIZE(subobj);
875 }
876 else if (PyUnicode_Check(subobj))
877 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
878 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
879 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000880 if (n == 0) {
881 PyErr_SetString(PyExc_ValueError, "empty separator");
882 return NULL;
883 }
884
885 list = PyList_New(0);
886 if (list == NULL)
887 return NULL;
888
889 i = j = 0;
890 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000891 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000892 if (maxsplit-- <= 0)
893 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000894 item = PyString_FromStringAndSize(s+j, (int)(i-j));
895 if (item == NULL)
896 goto fail;
897 err = PyList_Append(list, item);
898 Py_DECREF(item);
899 if (err < 0)
900 goto fail;
901 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000902 }
903 else
904 i++;
905 }
906 item = PyString_FromStringAndSize(s+j, (int)(len-j));
907 if (item == NULL)
908 goto fail;
909 err = PyList_Append(list, item);
910 Py_DECREF(item);
911 if (err < 0)
912 goto fail;
913
914 return list;
915
916 fail:
917 Py_DECREF(list);
918 return NULL;
919}
920
921
922static char join__doc__[] =
923"S.join(sequence) -> string\n\
924\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000925Return a string which is the concatenation of the strings in the\n\
926sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000927
928static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000929string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000930{
931 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000932 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000933 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000934 char *p;
935 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000936 size_t sz = 0;
937 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000938 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000939
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000940 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000941 return NULL;
942
Tim Peters19fe14e2001-01-19 03:03:47 +0000943 seq = PySequence_Fast(orig, "");
944 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000945 if (PyErr_ExceptionMatches(PyExc_TypeError))
946 PyErr_Format(PyExc_TypeError,
947 "sequence expected, %.80s found",
948 orig->ob_type->tp_name);
949 return NULL;
950 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000951
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000952 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000953 if (seqlen == 0) {
954 Py_DECREF(seq);
955 return PyString_FromString("");
956 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000957 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000958 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000959 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
960 PyErr_Format(PyExc_TypeError,
961 "sequence item 0: expected string,"
962 " %.80s found",
963 item->ob_type->tp_name);
964 Py_DECREF(seq);
965 return NULL;
966 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000967 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000968 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000969 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000970 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000971
Tim Peters19fe14e2001-01-19 03:03:47 +0000972 /* There are at least two things to join. Do a pre-pass to figure out
973 * the total amount of space we'll need (sz), see whether any argument
974 * is absurd, and defer to the Unicode join if appropriate.
975 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000976 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000977 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000978 item = PySequence_Fast_GET_ITEM(seq, i);
979 if (!PyString_Check(item)){
980 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +0000981 /* Defer to Unicode join.
982 * CAUTION: There's no gurantee that the
983 * original sequence can be iterated over
984 * again, so we must pass seq here.
985 */
986 PyObject *result;
987 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000988 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +0000989 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000990 }
991 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000992 "sequence item %i: expected string,"
993 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000994 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000995 Py_DECREF(seq);
996 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000997 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000998 sz += PyString_GET_SIZE(item);
999 if (i != 0)
1000 sz += seplen;
1001 if (sz < old_sz || sz > INT_MAX) {
1002 PyErr_SetString(PyExc_OverflowError,
1003 "join() is too long for a Python string");
1004 Py_DECREF(seq);
1005 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001007 }
1008
1009 /* Allocate result space. */
1010 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
1011 if (res == NULL) {
1012 Py_DECREF(seq);
1013 return NULL;
1014 }
1015
1016 /* Catenate everything. */
1017 p = PyString_AS_STRING(res);
1018 for (i = 0; i < seqlen; ++i) {
1019 size_t n;
1020 item = PySequence_Fast_GET_ITEM(seq, i);
1021 n = PyString_GET_SIZE(item);
1022 memcpy(p, PyString_AS_STRING(item), n);
1023 p += n;
1024 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001025 memcpy(p, sep, seplen);
1026 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +00001027 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028 }
Tim Peters19fe14e2001-01-19 03:03:47 +00001029
Jeremy Hylton49048292000-07-11 03:28:17 +00001030 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001031 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001032}
1033
Tim Peters52e155e2001-06-16 05:42:57 +00001034PyObject *
1035_PyString_Join(PyObject *sep, PyObject *x)
Tim Petersa7259592001-06-16 05:11:17 +00001036{
1037 PyObject* args;
1038 PyObject* result = NULL;
1039
1040 assert(sep != NULL && PyString_Check(sep));
1041 assert(x != NULL);
1042 args = PyTuple_New(1);
1043 if (args != NULL) {
1044 Py_INCREF(x);
1045 PyTuple_SET_ITEM(args, 0, x);
1046 result = string_join((PyStringObject *)sep, args);
1047 Py_DECREF(args);
1048 }
1049 return result;
1050}
1051
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001052static long
Fred Drakeba096332000-07-09 07:04:36 +00001053string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001055 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001056 int len = PyString_GET_SIZE(self);
1057 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001058 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001059
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001060 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +00001061 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001062 return -2;
1063 if (PyString_Check(subobj)) {
1064 sub = PyString_AS_STRING(subobj);
1065 n = PyString_GET_SIZE(subobj);
1066 }
1067 else if (PyUnicode_Check(subobj))
1068 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
1069 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001070 return -2;
1071
1072 if (last > len)
1073 last = len;
1074 if (last < 0)
1075 last += len;
1076 if (last < 0)
1077 last = 0;
1078 if (i < 0)
1079 i += len;
1080 if (i < 0)
1081 i = 0;
1082
Guido van Rossum4c08d552000-03-10 22:55:18 +00001083 if (dir > 0) {
1084 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001085 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001086 last -= n;
1087 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001088 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001089 return (long)i;
1090 }
1091 else {
1092 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001093
Guido van Rossum4c08d552000-03-10 22:55:18 +00001094 if (n == 0 && i <= last)
1095 return (long)last;
1096 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001097 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001098 return (long)j;
1099 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001100
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001101 return -1;
1102}
1103
1104
1105static char find__doc__[] =
1106"S.find(sub [,start [,end]]) -> int\n\
1107\n\
1108Return the lowest index in S where substring sub is found,\n\
1109such that sub is contained within s[start,end]. Optional\n\
1110arguments start and end are interpreted as in slice notation.\n\
1111\n\
1112Return -1 on failure.";
1113
1114static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001115string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001116{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001117 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001118 if (result == -2)
1119 return NULL;
1120 return PyInt_FromLong(result);
1121}
1122
1123
1124static char index__doc__[] =
1125"S.index(sub [,start [,end]]) -> int\n\
1126\n\
1127Like S.find() but raise ValueError when the substring is not found.";
1128
1129static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001130string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001131{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001132 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001133 if (result == -2)
1134 return NULL;
1135 if (result == -1) {
1136 PyErr_SetString(PyExc_ValueError,
1137 "substring not found in string.index");
1138 return NULL;
1139 }
1140 return PyInt_FromLong(result);
1141}
1142
1143
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001144static char rfind__doc__[] =
1145"S.rfind(sub [,start [,end]]) -> int\n\
1146\n\
1147Return the highest index in S where substring sub is found,\n\
1148such that sub is contained within s[start,end]. Optional\n\
1149arguments start and end are interpreted as in slice notation.\n\
1150\n\
1151Return -1 on failure.";
1152
1153static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001154string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001155{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001156 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001157 if (result == -2)
1158 return NULL;
1159 return PyInt_FromLong(result);
1160}
1161
1162
1163static char rindex__doc__[] =
1164"S.rindex(sub [,start [,end]]) -> int\n\
1165\n\
1166Like S.rfind() but raise ValueError when the substring is not found.";
1167
1168static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001169string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001170{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001171 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001172 if (result == -2)
1173 return NULL;
1174 if (result == -1) {
1175 PyErr_SetString(PyExc_ValueError,
1176 "substring not found in string.rindex");
1177 return NULL;
1178 }
1179 return PyInt_FromLong(result);
1180}
1181
1182
1183static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001184do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001185{
1186 char *s = PyString_AS_STRING(self);
1187 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001188
Guido van Rossum43713e52000-02-29 13:59:29 +00001189 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001190 return NULL;
1191
1192 i = 0;
1193 if (striptype != RIGHTSTRIP) {
1194 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1195 i++;
1196 }
1197 }
1198
1199 j = len;
1200 if (striptype != LEFTSTRIP) {
1201 do {
1202 j--;
1203 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1204 j++;
1205 }
1206
1207 if (i == 0 && j == len) {
1208 Py_INCREF(self);
1209 return (PyObject*)self;
1210 }
1211 else
1212 return PyString_FromStringAndSize(s+i, j-i);
1213}
1214
1215
1216static char strip__doc__[] =
1217"S.strip() -> string\n\
1218\n\
1219Return a copy of the string S with leading and trailing\n\
1220whitespace removed.";
1221
1222static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001223string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001224{
1225 return do_strip(self, args, BOTHSTRIP);
1226}
1227
1228
1229static char lstrip__doc__[] =
1230"S.lstrip() -> string\n\
1231\n\
1232Return a copy of the string S with leading whitespace removed.";
1233
1234static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001235string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001236{
1237 return do_strip(self, args, LEFTSTRIP);
1238}
1239
1240
1241static char rstrip__doc__[] =
1242"S.rstrip() -> string\n\
1243\n\
1244Return a copy of the string S with trailing whitespace removed.";
1245
1246static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001247string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001248{
1249 return do_strip(self, args, RIGHTSTRIP);
1250}
1251
1252
1253static char lower__doc__[] =
1254"S.lower() -> string\n\
1255\n\
1256Return a copy of the string S converted to lowercase.";
1257
1258static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001259string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001260{
1261 char *s = PyString_AS_STRING(self), *s_new;
1262 int i, n = PyString_GET_SIZE(self);
1263 PyObject *new;
1264
Guido van Rossum43713e52000-02-29 13:59:29 +00001265 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001266 return NULL;
1267 new = PyString_FromStringAndSize(NULL, n);
1268 if (new == NULL)
1269 return NULL;
1270 s_new = PyString_AsString(new);
1271 for (i = 0; i < n; i++) {
1272 int c = Py_CHARMASK(*s++);
1273 if (isupper(c)) {
1274 *s_new = tolower(c);
1275 } else
1276 *s_new = c;
1277 s_new++;
1278 }
1279 return new;
1280}
1281
1282
1283static char upper__doc__[] =
1284"S.upper() -> string\n\
1285\n\
1286Return a copy of the string S converted to uppercase.";
1287
1288static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001289string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001290{
1291 char *s = PyString_AS_STRING(self), *s_new;
1292 int i, n = PyString_GET_SIZE(self);
1293 PyObject *new;
1294
Guido van Rossum43713e52000-02-29 13:59:29 +00001295 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001296 return NULL;
1297 new = PyString_FromStringAndSize(NULL, n);
1298 if (new == NULL)
1299 return NULL;
1300 s_new = PyString_AsString(new);
1301 for (i = 0; i < n; i++) {
1302 int c = Py_CHARMASK(*s++);
1303 if (islower(c)) {
1304 *s_new = toupper(c);
1305 } else
1306 *s_new = c;
1307 s_new++;
1308 }
1309 return new;
1310}
1311
1312
Guido van Rossum4c08d552000-03-10 22:55:18 +00001313static char title__doc__[] =
1314"S.title() -> string\n\
1315\n\
1316Return a titlecased version of S, i.e. words start with uppercase\n\
1317characters, all remaining cased characters have lowercase.";
1318
1319static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001320string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001321{
1322 char *s = PyString_AS_STRING(self), *s_new;
1323 int i, n = PyString_GET_SIZE(self);
1324 int previous_is_cased = 0;
1325 PyObject *new;
1326
1327 if (!PyArg_ParseTuple(args, ":title"))
1328 return NULL;
1329 new = PyString_FromStringAndSize(NULL, n);
1330 if (new == NULL)
1331 return NULL;
1332 s_new = PyString_AsString(new);
1333 for (i = 0; i < n; i++) {
1334 int c = Py_CHARMASK(*s++);
1335 if (islower(c)) {
1336 if (!previous_is_cased)
1337 c = toupper(c);
1338 previous_is_cased = 1;
1339 } else if (isupper(c)) {
1340 if (previous_is_cased)
1341 c = tolower(c);
1342 previous_is_cased = 1;
1343 } else
1344 previous_is_cased = 0;
1345 *s_new++ = c;
1346 }
1347 return new;
1348}
1349
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001350static char capitalize__doc__[] =
1351"S.capitalize() -> string\n\
1352\n\
1353Return a copy of the string S with only its first character\n\
1354capitalized.";
1355
1356static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001357string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001358{
1359 char *s = PyString_AS_STRING(self), *s_new;
1360 int i, n = PyString_GET_SIZE(self);
1361 PyObject *new;
1362
Guido van Rossum43713e52000-02-29 13:59:29 +00001363 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364 return NULL;
1365 new = PyString_FromStringAndSize(NULL, n);
1366 if (new == NULL)
1367 return NULL;
1368 s_new = PyString_AsString(new);
1369 if (0 < n) {
1370 int c = Py_CHARMASK(*s++);
1371 if (islower(c))
1372 *s_new = toupper(c);
1373 else
1374 *s_new = c;
1375 s_new++;
1376 }
1377 for (i = 1; i < n; i++) {
1378 int c = Py_CHARMASK(*s++);
1379 if (isupper(c))
1380 *s_new = tolower(c);
1381 else
1382 *s_new = c;
1383 s_new++;
1384 }
1385 return new;
1386}
1387
1388
1389static char count__doc__[] =
1390"S.count(sub[, start[, end]]) -> int\n\
1391\n\
1392Return the number of occurrences of substring sub in string\n\
1393S[start:end]. Optional arguments start and end are\n\
1394interpreted as in slice notation.";
1395
1396static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001397string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001398{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001399 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 int len = PyString_GET_SIZE(self), n;
1401 int i = 0, last = INT_MAX;
1402 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001403 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001404
Guido van Rossumc6821402000-05-08 14:08:05 +00001405 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1406 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001407 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001408
Guido van Rossum4c08d552000-03-10 22:55:18 +00001409 if (PyString_Check(subobj)) {
1410 sub = PyString_AS_STRING(subobj);
1411 n = PyString_GET_SIZE(subobj);
1412 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001413 else if (PyUnicode_Check(subobj)) {
1414 int count;
1415 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1416 if (count == -1)
1417 return NULL;
1418 else
1419 return PyInt_FromLong((long) count);
1420 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001421 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1422 return NULL;
1423
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424 if (last > len)
1425 last = len;
1426 if (last < 0)
1427 last += len;
1428 if (last < 0)
1429 last = 0;
1430 if (i < 0)
1431 i += len;
1432 if (i < 0)
1433 i = 0;
1434 m = last + 1 - n;
1435 if (n == 0)
1436 return PyInt_FromLong((long) (m-i));
1437
1438 r = 0;
1439 while (i < m) {
1440 if (!memcmp(s+i, sub, n)) {
1441 r++;
1442 i += n;
1443 } else {
1444 i++;
1445 }
1446 }
1447 return PyInt_FromLong((long) r);
1448}
1449
1450
1451static char swapcase__doc__[] =
1452"S.swapcase() -> string\n\
1453\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001454Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001455converted to lowercase and vice versa.";
1456
1457static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001458string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001459{
1460 char *s = PyString_AS_STRING(self), *s_new;
1461 int i, n = PyString_GET_SIZE(self);
1462 PyObject *new;
1463
Guido van Rossum43713e52000-02-29 13:59:29 +00001464 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001465 return NULL;
1466 new = PyString_FromStringAndSize(NULL, n);
1467 if (new == NULL)
1468 return NULL;
1469 s_new = PyString_AsString(new);
1470 for (i = 0; i < n; i++) {
1471 int c = Py_CHARMASK(*s++);
1472 if (islower(c)) {
1473 *s_new = toupper(c);
1474 }
1475 else if (isupper(c)) {
1476 *s_new = tolower(c);
1477 }
1478 else
1479 *s_new = c;
1480 s_new++;
1481 }
1482 return new;
1483}
1484
1485
1486static char translate__doc__[] =
1487"S.translate(table [,deletechars]) -> string\n\
1488\n\
1489Return a copy of the string S, where all characters occurring\n\
1490in the optional argument deletechars are removed, and the\n\
1491remaining characters have been mapped through the given\n\
1492translation table, which must be a string of length 256.";
1493
1494static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001495string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001496{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001497 register char *input, *output;
1498 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001499 register int i, c, changed = 0;
1500 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001501 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502 int inlen, tablen, dellen = 0;
1503 PyObject *result;
1504 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001505 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001506
Guido van Rossum4c08d552000-03-10 22:55:18 +00001507 if (!PyArg_ParseTuple(args, "O|O:translate",
1508 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001509 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001510
1511 if (PyString_Check(tableobj)) {
1512 table1 = PyString_AS_STRING(tableobj);
1513 tablen = PyString_GET_SIZE(tableobj);
1514 }
1515 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001516 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001517 parameter; instead a mapping to None will cause characters
1518 to be deleted. */
1519 if (delobj != NULL) {
1520 PyErr_SetString(PyExc_TypeError,
1521 "deletions are implemented differently for unicode");
1522 return NULL;
1523 }
1524 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1525 }
1526 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001527 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001528
1529 if (delobj != NULL) {
1530 if (PyString_Check(delobj)) {
1531 del_table = PyString_AS_STRING(delobj);
1532 dellen = PyString_GET_SIZE(delobj);
1533 }
1534 else if (PyUnicode_Check(delobj)) {
1535 PyErr_SetString(PyExc_TypeError,
1536 "deletions are implemented differently for unicode");
1537 return NULL;
1538 }
1539 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1540 return NULL;
1541
1542 if (tablen != 256) {
1543 PyErr_SetString(PyExc_ValueError,
1544 "translation table must be 256 characters long");
1545 return NULL;
1546 }
1547 }
1548 else {
1549 del_table = NULL;
1550 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001551 }
1552
1553 table = table1;
1554 inlen = PyString_Size(input_obj);
1555 result = PyString_FromStringAndSize((char *)NULL, inlen);
1556 if (result == NULL)
1557 return NULL;
1558 output_start = output = PyString_AsString(result);
1559 input = PyString_AsString(input_obj);
1560
1561 if (dellen == 0) {
1562 /* If no deletions are required, use faster code */
1563 for (i = inlen; --i >= 0; ) {
1564 c = Py_CHARMASK(*input++);
1565 if (Py_CHARMASK((*output++ = table[c])) != c)
1566 changed = 1;
1567 }
1568 if (changed)
1569 return result;
1570 Py_DECREF(result);
1571 Py_INCREF(input_obj);
1572 return input_obj;
1573 }
1574
1575 for (i = 0; i < 256; i++)
1576 trans_table[i] = Py_CHARMASK(table[i]);
1577
1578 for (i = 0; i < dellen; i++)
1579 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1580
1581 for (i = inlen; --i >= 0; ) {
1582 c = Py_CHARMASK(*input++);
1583 if (trans_table[c] != -1)
1584 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1585 continue;
1586 changed = 1;
1587 }
1588 if (!changed) {
1589 Py_DECREF(result);
1590 Py_INCREF(input_obj);
1591 return input_obj;
1592 }
1593 /* Fix the size of the resulting string */
1594 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1595 return NULL;
1596 return result;
1597}
1598
1599
1600/* What follows is used for implementing replace(). Perry Stoll. */
1601
1602/*
1603 mymemfind
1604
1605 strstr replacement for arbitrary blocks of memory.
1606
Barry Warsaw51ac5802000-03-20 16:36:48 +00001607 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608 contents of memory pointed to by PAT. Returns the index into MEM if
1609 found, or -1 if not found. If len of PAT is greater than length of
1610 MEM, the function returns -1.
1611*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001612static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001613mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614{
1615 register int ii;
1616
1617 /* pattern can not occur in the last pat_len-1 chars */
1618 len -= pat_len;
1619
1620 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001621 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001622 return ii;
1623 }
1624 }
1625 return -1;
1626}
1627
1628/*
1629 mymemcnt
1630
1631 Return the number of distinct times PAT is found in MEM.
1632 meaning mem=1111 and pat==11 returns 2.
1633 mem=11111 and pat==11 also return 2.
1634 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001635static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001636mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001637{
1638 register int offset = 0;
1639 int nfound = 0;
1640
1641 while (len >= 0) {
1642 offset = mymemfind(mem, len, pat, pat_len);
1643 if (offset == -1)
1644 break;
1645 mem += offset + pat_len;
1646 len -= offset + pat_len;
1647 nfound++;
1648 }
1649 return nfound;
1650}
1651
1652/*
1653 mymemreplace
1654
Thomas Wouters7e474022000-07-16 12:04:32 +00001655 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001656 replaced with SUB.
1657
Thomas Wouters7e474022000-07-16 12:04:32 +00001658 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001659 of PAT in STR, then the original string is returned. Otherwise, a new
1660 string is allocated here and returned.
1661
1662 on return, out_len is:
1663 the length of output string, or
1664 -1 if the input string is returned, or
1665 unchanged if an error occurs (no memory).
1666
1667 return value is:
1668 the new string allocated locally, or
1669 NULL if an error occurred.
1670*/
1671static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001672mymemreplace(const char *str, int len, /* input string */
1673 const char *pat, int pat_len, /* pattern string to find */
1674 const char *sub, int sub_len, /* substitution string */
1675 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001676 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001677{
1678 char *out_s;
1679 char *new_s;
1680 int nfound, offset, new_len;
1681
1682 if (len == 0 || pat_len > len)
1683 goto return_same;
1684
1685 /* find length of output string */
1686 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001687 if (count < 0)
1688 count = INT_MAX;
1689 else if (nfound > count)
1690 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691 if (nfound == 0)
1692 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001693
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001695 if (new_len == 0) {
1696 /* Have to allocate something for the caller to free(). */
1697 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001698 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001699 return NULL;
1700 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001701 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001702 else {
1703 assert(new_len > 0);
1704 new_s = (char *)PyMem_MALLOC(new_len);
1705 if (new_s == NULL)
1706 return NULL;
1707 out_s = new_s;
1708
Tim Peters9c012af2001-05-10 00:32:57 +00001709 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001710 /* find index of next instance of pattern */
1711 offset = mymemfind(str, len, pat, pat_len);
1712 if (offset == -1)
1713 break;
1714
1715 /* copy non matching part of input string */
1716 memcpy(new_s, str, offset);
1717 str += offset + pat_len;
1718 len -= offset + pat_len;
1719
1720 /* copy substitute into the output string */
1721 new_s += offset;
1722 memcpy(new_s, sub, sub_len);
1723 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001724 }
1725 /* copy any remaining values into output string */
1726 if (len > 0)
1727 memcpy(new_s, str, len);
1728 }
1729 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730 return out_s;
1731
1732 return_same:
1733 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001734 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735}
1736
1737
1738static char replace__doc__[] =
1739"S.replace (old, new[, maxsplit]) -> string\n\
1740\n\
1741Return a copy of string S with all occurrences of substring\n\
1742old replaced by new. If the optional argument maxsplit is\n\
1743given, only the first maxsplit occurrences are replaced.";
1744
1745static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001746string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001747{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 const char *str = PyString_AS_STRING(self), *sub, *repl;
1749 char *new_s;
1750 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1751 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001753 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001754
Guido van Rossum4c08d552000-03-10 22:55:18 +00001755 if (!PyArg_ParseTuple(args, "OO|i:replace",
1756 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001757 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001758
1759 if (PyString_Check(subobj)) {
1760 sub = PyString_AS_STRING(subobj);
1761 sub_len = PyString_GET_SIZE(subobj);
1762 }
1763 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001764 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001765 subobj, replobj, count);
1766 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1767 return NULL;
1768
1769 if (PyString_Check(replobj)) {
1770 repl = PyString_AS_STRING(replobj);
1771 repl_len = PyString_GET_SIZE(replobj);
1772 }
1773 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001774 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001775 subobj, replobj, count);
1776 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1777 return NULL;
1778
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001779 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001780 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001781 return NULL;
1782 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001783 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001784 if (new_s == NULL) {
1785 PyErr_NoMemory();
1786 return NULL;
1787 }
1788 if (out_len == -1) {
1789 /* we're returning another reference to self */
1790 new = (PyObject*)self;
1791 Py_INCREF(new);
1792 }
1793 else {
1794 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001795 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001796 }
1797 return new;
1798}
1799
1800
1801static char startswith__doc__[] =
1802"S.startswith(prefix[, start[, end]]) -> int\n\
1803\n\
1804Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1805optional start, test S beginning at that position. With optional end, stop\n\
1806comparing S at that position.";
1807
1808static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001809string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001810{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001811 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001812 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001813 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001814 int plen;
1815 int start = 0;
1816 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001817 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001818
Guido van Rossumc6821402000-05-08 14:08:05 +00001819 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1820 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001821 return NULL;
1822 if (PyString_Check(subobj)) {
1823 prefix = PyString_AS_STRING(subobj);
1824 plen = PyString_GET_SIZE(subobj);
1825 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001826 else if (PyUnicode_Check(subobj)) {
1827 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001828 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001829 subobj, start, end, -1);
1830 if (rc == -1)
1831 return NULL;
1832 else
1833 return PyInt_FromLong((long) rc);
1834 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001835 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001836 return NULL;
1837
1838 /* adopt Java semantics for index out of range. it is legal for
1839 * offset to be == plen, but this only returns true if prefix is
1840 * the empty string.
1841 */
1842 if (start < 0 || start+plen > len)
1843 return PyInt_FromLong(0);
1844
1845 if (!memcmp(str+start, prefix, plen)) {
1846 /* did the match end after the specified end? */
1847 if (end < 0)
1848 return PyInt_FromLong(1);
1849 else if (end - start < plen)
1850 return PyInt_FromLong(0);
1851 else
1852 return PyInt_FromLong(1);
1853 }
1854 else return PyInt_FromLong(0);
1855}
1856
1857
1858static char endswith__doc__[] =
1859"S.endswith(suffix[, start[, end]]) -> int\n\
1860\n\
1861Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1862optional start, test S beginning at that position. With optional end, stop\n\
1863comparing S at that position.";
1864
1865static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001866string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001867{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001868 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001869 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001870 const char* suffix;
1871 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001872 int start = 0;
1873 int end = -1;
1874 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001875 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001876
Guido van Rossumc6821402000-05-08 14:08:05 +00001877 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1878 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001879 return NULL;
1880 if (PyString_Check(subobj)) {
1881 suffix = PyString_AS_STRING(subobj);
1882 slen = PyString_GET_SIZE(subobj);
1883 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001884 else if (PyUnicode_Check(subobj)) {
1885 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001886 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001887 subobj, start, end, +1);
1888 if (rc == -1)
1889 return NULL;
1890 else
1891 return PyInt_FromLong((long) rc);
1892 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001893 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001894 return NULL;
1895
Guido van Rossum4c08d552000-03-10 22:55:18 +00001896 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001897 return PyInt_FromLong(0);
1898
1899 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001900 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001901
Guido van Rossum4c08d552000-03-10 22:55:18 +00001902 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001903 return PyInt_FromLong(1);
1904 else return PyInt_FromLong(0);
1905}
1906
1907
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001908static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001909"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001910\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001911Encodes S using the codec registered for encoding. encoding defaults\n\
1912to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001913handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1914a ValueError. Other possible values are 'ignore' and 'replace'.";
1915
1916static PyObject *
1917string_encode(PyStringObject *self, PyObject *args)
1918{
1919 char *encoding = NULL;
1920 char *errors = NULL;
1921 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1922 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001923 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
1924}
1925
1926
1927static char decode__doc__[] =
1928"S.decode([encoding[,errors]]) -> object\n\
1929\n\
1930Decodes S using the codec registered for encoding. encoding defaults\n\
1931to the default encoding. errors may be given to set a different error\n\
1932handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1933a ValueError. Other possible values are 'ignore' and 'replace'.";
1934
1935static PyObject *
1936string_decode(PyStringObject *self, PyObject *args)
1937{
1938 char *encoding = NULL;
1939 char *errors = NULL;
1940 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
1941 return NULL;
1942 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001943}
1944
1945
Guido van Rossum4c08d552000-03-10 22:55:18 +00001946static char expandtabs__doc__[] =
1947"S.expandtabs([tabsize]) -> string\n\
1948\n\
1949Return a copy of S where all tab characters are expanded using spaces.\n\
1950If tabsize is not given, a tab size of 8 characters is assumed.";
1951
1952static PyObject*
1953string_expandtabs(PyStringObject *self, PyObject *args)
1954{
1955 const char *e, *p;
1956 char *q;
1957 int i, j;
1958 PyObject *u;
1959 int tabsize = 8;
1960
1961 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1962 return NULL;
1963
Thomas Wouters7e474022000-07-16 12:04:32 +00001964 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001965 i = j = 0;
1966 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1967 for (p = PyString_AS_STRING(self); p < e; p++)
1968 if (*p == '\t') {
1969 if (tabsize > 0)
1970 j += tabsize - (j % tabsize);
1971 }
1972 else {
1973 j++;
1974 if (*p == '\n' || *p == '\r') {
1975 i += j;
1976 j = 0;
1977 }
1978 }
1979
1980 /* Second pass: create output string and fill it */
1981 u = PyString_FromStringAndSize(NULL, i + j);
1982 if (!u)
1983 return NULL;
1984
1985 j = 0;
1986 q = PyString_AS_STRING(u);
1987
1988 for (p = PyString_AS_STRING(self); p < e; p++)
1989 if (*p == '\t') {
1990 if (tabsize > 0) {
1991 i = tabsize - (j % tabsize);
1992 j += i;
1993 while (i--)
1994 *q++ = ' ';
1995 }
1996 }
1997 else {
1998 j++;
1999 *q++ = *p;
2000 if (*p == '\n' || *p == '\r')
2001 j = 0;
2002 }
2003
2004 return u;
2005}
2006
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002007static
2008PyObject *pad(PyStringObject *self,
2009 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002010 int right,
2011 char fill)
2012{
2013 PyObject *u;
2014
2015 if (left < 0)
2016 left = 0;
2017 if (right < 0)
2018 right = 0;
2019
2020 if (left == 0 && right == 0) {
2021 Py_INCREF(self);
2022 return (PyObject *)self;
2023 }
2024
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002025 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00002026 left + PyString_GET_SIZE(self) + right);
2027 if (u) {
2028 if (left)
2029 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002030 memcpy(PyString_AS_STRING(u) + left,
2031 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00002032 PyString_GET_SIZE(self));
2033 if (right)
2034 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
2035 fill, right);
2036 }
2037
2038 return u;
2039}
2040
2041static char ljust__doc__[] =
2042"S.ljust(width) -> string\n\
2043\n\
2044Return S left justified in a string of length width. Padding is\n\
2045done using spaces.";
2046
2047static PyObject *
2048string_ljust(PyStringObject *self, PyObject *args)
2049{
2050 int width;
2051 if (!PyArg_ParseTuple(args, "i:ljust", &width))
2052 return NULL;
2053
2054 if (PyString_GET_SIZE(self) >= width) {
2055 Py_INCREF(self);
2056 return (PyObject*) self;
2057 }
2058
2059 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
2060}
2061
2062
2063static char rjust__doc__[] =
2064"S.rjust(width) -> string\n\
2065\n\
2066Return S right justified in a string of length width. Padding is\n\
2067done using spaces.";
2068
2069static PyObject *
2070string_rjust(PyStringObject *self, PyObject *args)
2071{
2072 int width;
2073 if (!PyArg_ParseTuple(args, "i:rjust", &width))
2074 return NULL;
2075
2076 if (PyString_GET_SIZE(self) >= width) {
2077 Py_INCREF(self);
2078 return (PyObject*) self;
2079 }
2080
2081 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2082}
2083
2084
2085static char center__doc__[] =
2086"S.center(width) -> string\n\
2087\n\
2088Return S centered in a string of length width. Padding is done\n\
2089using spaces.";
2090
2091static PyObject *
2092string_center(PyStringObject *self, PyObject *args)
2093{
2094 int marg, left;
2095 int width;
2096
2097 if (!PyArg_ParseTuple(args, "i:center", &width))
2098 return NULL;
2099
2100 if (PyString_GET_SIZE(self) >= width) {
2101 Py_INCREF(self);
2102 return (PyObject*) self;
2103 }
2104
2105 marg = width - PyString_GET_SIZE(self);
2106 left = marg / 2 + (marg & width & 1);
2107
2108 return pad(self, left, marg - left, ' ');
2109}
2110
2111#if 0
2112static char zfill__doc__[] =
2113"S.zfill(width) -> string\n\
2114\n\
2115Pad a numeric string x with zeros on the left, to fill a field\n\
2116of the specified width. The string x is never truncated.";
2117
2118static PyObject *
2119string_zfill(PyStringObject *self, PyObject *args)
2120{
2121 int fill;
2122 PyObject *u;
2123 char *str;
2124
2125 int width;
2126 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2127 return NULL;
2128
2129 if (PyString_GET_SIZE(self) >= width) {
2130 Py_INCREF(self);
2131 return (PyObject*) self;
2132 }
2133
2134 fill = width - PyString_GET_SIZE(self);
2135
2136 u = pad(self, fill, 0, '0');
2137 if (u == NULL)
2138 return NULL;
2139
2140 str = PyString_AS_STRING(u);
2141 if (str[fill] == '+' || str[fill] == '-') {
2142 /* move sign to beginning of string */
2143 str[0] = str[fill];
2144 str[fill] = '0';
2145 }
2146
2147 return u;
2148}
2149#endif
2150
2151static char isspace__doc__[] =
2152"S.isspace() -> int\n\
2153\n\
2154Return 1 if there are only whitespace characters in S,\n\
21550 otherwise.";
2156
2157static PyObject*
2158string_isspace(PyStringObject *self, PyObject *args)
2159{
Fred Drakeba096332000-07-09 07:04:36 +00002160 register const unsigned char *p
2161 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002162 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002163
2164 if (!PyArg_NoArgs(args))
2165 return NULL;
2166
2167 /* Shortcut for single character strings */
2168 if (PyString_GET_SIZE(self) == 1 &&
2169 isspace(*p))
2170 return PyInt_FromLong(1);
2171
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002172 /* Special case for empty strings */
2173 if (PyString_GET_SIZE(self) == 0)
2174 return PyInt_FromLong(0);
2175
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176 e = p + PyString_GET_SIZE(self);
2177 for (; p < e; p++) {
2178 if (!isspace(*p))
2179 return PyInt_FromLong(0);
2180 }
2181 return PyInt_FromLong(1);
2182}
2183
2184
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002185static char isalpha__doc__[] =
2186"S.isalpha() -> int\n\
2187\n\
2188Return 1 if all characters in S are alphabetic\n\
2189and there is at least one character in S, 0 otherwise.";
2190
2191static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002192string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002193{
Fred Drakeba096332000-07-09 07:04:36 +00002194 register const unsigned char *p
2195 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002196 register const unsigned char *e;
2197
2198 if (!PyArg_NoArgs(args))
2199 return NULL;
2200
2201 /* Shortcut for single character strings */
2202 if (PyString_GET_SIZE(self) == 1 &&
2203 isalpha(*p))
2204 return PyInt_FromLong(1);
2205
2206 /* Special case for empty strings */
2207 if (PyString_GET_SIZE(self) == 0)
2208 return PyInt_FromLong(0);
2209
2210 e = p + PyString_GET_SIZE(self);
2211 for (; p < e; p++) {
2212 if (!isalpha(*p))
2213 return PyInt_FromLong(0);
2214 }
2215 return PyInt_FromLong(1);
2216}
2217
2218
2219static char isalnum__doc__[] =
2220"S.isalnum() -> int\n\
2221\n\
2222Return 1 if all characters in S are alphanumeric\n\
2223and there is at least one character in S, 0 otherwise.";
2224
2225static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002226string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002227{
Fred Drakeba096332000-07-09 07:04:36 +00002228 register const unsigned char *p
2229 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002230 register const unsigned char *e;
2231
2232 if (!PyArg_NoArgs(args))
2233 return NULL;
2234
2235 /* Shortcut for single character strings */
2236 if (PyString_GET_SIZE(self) == 1 &&
2237 isalnum(*p))
2238 return PyInt_FromLong(1);
2239
2240 /* Special case for empty strings */
2241 if (PyString_GET_SIZE(self) == 0)
2242 return PyInt_FromLong(0);
2243
2244 e = p + PyString_GET_SIZE(self);
2245 for (; p < e; p++) {
2246 if (!isalnum(*p))
2247 return PyInt_FromLong(0);
2248 }
2249 return PyInt_FromLong(1);
2250}
2251
2252
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253static char isdigit__doc__[] =
2254"S.isdigit() -> int\n\
2255\n\
2256Return 1 if there are only digit characters in S,\n\
22570 otherwise.";
2258
2259static PyObject*
2260string_isdigit(PyStringObject *self, PyObject *args)
2261{
Fred Drakeba096332000-07-09 07:04:36 +00002262 register const unsigned char *p
2263 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002264 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002265
2266 if (!PyArg_NoArgs(args))
2267 return NULL;
2268
2269 /* Shortcut for single character strings */
2270 if (PyString_GET_SIZE(self) == 1 &&
2271 isdigit(*p))
2272 return PyInt_FromLong(1);
2273
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002274 /* Special case for empty strings */
2275 if (PyString_GET_SIZE(self) == 0)
2276 return PyInt_FromLong(0);
2277
Guido van Rossum4c08d552000-03-10 22:55:18 +00002278 e = p + PyString_GET_SIZE(self);
2279 for (; p < e; p++) {
2280 if (!isdigit(*p))
2281 return PyInt_FromLong(0);
2282 }
2283 return PyInt_FromLong(1);
2284}
2285
2286
2287static char islower__doc__[] =
2288"S.islower() -> int\n\
2289\n\
2290Return 1 if all cased characters in S are lowercase and there is\n\
2291at least one cased character in S, 0 otherwise.";
2292
2293static PyObject*
2294string_islower(PyStringObject *self, PyObject *args)
2295{
Fred Drakeba096332000-07-09 07:04:36 +00002296 register const unsigned char *p
2297 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002298 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002299 int cased;
2300
2301 if (!PyArg_NoArgs(args))
2302 return NULL;
2303
2304 /* Shortcut for single character strings */
2305 if (PyString_GET_SIZE(self) == 1)
2306 return PyInt_FromLong(islower(*p) != 0);
2307
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002308 /* Special case for empty strings */
2309 if (PyString_GET_SIZE(self) == 0)
2310 return PyInt_FromLong(0);
2311
Guido van Rossum4c08d552000-03-10 22:55:18 +00002312 e = p + PyString_GET_SIZE(self);
2313 cased = 0;
2314 for (; p < e; p++) {
2315 if (isupper(*p))
2316 return PyInt_FromLong(0);
2317 else if (!cased && islower(*p))
2318 cased = 1;
2319 }
2320 return PyInt_FromLong(cased);
2321}
2322
2323
2324static char isupper__doc__[] =
2325"S.isupper() -> int\n\
2326\n\
2327Return 1 if all cased characters in S are uppercase and there is\n\
2328at least one cased character in S, 0 otherwise.";
2329
2330static PyObject*
2331string_isupper(PyStringObject *self, PyObject *args)
2332{
Fred Drakeba096332000-07-09 07:04:36 +00002333 register const unsigned char *p
2334 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002335 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002336 int cased;
2337
2338 if (!PyArg_NoArgs(args))
2339 return NULL;
2340
2341 /* Shortcut for single character strings */
2342 if (PyString_GET_SIZE(self) == 1)
2343 return PyInt_FromLong(isupper(*p) != 0);
2344
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002345 /* Special case for empty strings */
2346 if (PyString_GET_SIZE(self) == 0)
2347 return PyInt_FromLong(0);
2348
Guido van Rossum4c08d552000-03-10 22:55:18 +00002349 e = p + PyString_GET_SIZE(self);
2350 cased = 0;
2351 for (; p < e; p++) {
2352 if (islower(*p))
2353 return PyInt_FromLong(0);
2354 else if (!cased && isupper(*p))
2355 cased = 1;
2356 }
2357 return PyInt_FromLong(cased);
2358}
2359
2360
2361static char istitle__doc__[] =
2362"S.istitle() -> int\n\
2363\n\
2364Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2365may only follow uncased characters and lowercase characters only cased\n\
2366ones. Return 0 otherwise.";
2367
2368static PyObject*
2369string_istitle(PyStringObject *self, PyObject *args)
2370{
Fred Drakeba096332000-07-09 07:04:36 +00002371 register const unsigned char *p
2372 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002373 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 int cased, previous_is_cased;
2375
2376 if (!PyArg_NoArgs(args))
2377 return NULL;
2378
2379 /* Shortcut for single character strings */
2380 if (PyString_GET_SIZE(self) == 1)
2381 return PyInt_FromLong(isupper(*p) != 0);
2382
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002383 /* Special case for empty strings */
2384 if (PyString_GET_SIZE(self) == 0)
2385 return PyInt_FromLong(0);
2386
Guido van Rossum4c08d552000-03-10 22:55:18 +00002387 e = p + PyString_GET_SIZE(self);
2388 cased = 0;
2389 previous_is_cased = 0;
2390 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002391 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002392
2393 if (isupper(ch)) {
2394 if (previous_is_cased)
2395 return PyInt_FromLong(0);
2396 previous_is_cased = 1;
2397 cased = 1;
2398 }
2399 else if (islower(ch)) {
2400 if (!previous_is_cased)
2401 return PyInt_FromLong(0);
2402 previous_is_cased = 1;
2403 cased = 1;
2404 }
2405 else
2406 previous_is_cased = 0;
2407 }
2408 return PyInt_FromLong(cased);
2409}
2410
2411
2412static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002413"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002414\n\
2415Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002416Line breaks are not included in the resulting list unless keepends\n\
2417is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002418
2419#define SPLIT_APPEND(data, left, right) \
2420 str = PyString_FromStringAndSize(data + left, right - left); \
2421 if (!str) \
2422 goto onError; \
2423 if (PyList_Append(list, str)) { \
2424 Py_DECREF(str); \
2425 goto onError; \
2426 } \
2427 else \
2428 Py_DECREF(str);
2429
2430static PyObject*
2431string_splitlines(PyStringObject *self, PyObject *args)
2432{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002433 register int i;
2434 register int j;
2435 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002436 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002437 PyObject *list;
2438 PyObject *str;
2439 char *data;
2440
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002441 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002442 return NULL;
2443
2444 data = PyString_AS_STRING(self);
2445 len = PyString_GET_SIZE(self);
2446
Guido van Rossum4c08d552000-03-10 22:55:18 +00002447 list = PyList_New(0);
2448 if (!list)
2449 goto onError;
2450
2451 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002452 int eol;
2453
Guido van Rossum4c08d552000-03-10 22:55:18 +00002454 /* Find a line and append it */
2455 while (i < len && data[i] != '\n' && data[i] != '\r')
2456 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002457
2458 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002459 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002460 if (i < len) {
2461 if (data[i] == '\r' && i + 1 < len &&
2462 data[i+1] == '\n')
2463 i += 2;
2464 else
2465 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002466 if (keepends)
2467 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002468 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002469 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002470 j = i;
2471 }
2472 if (j < len) {
2473 SPLIT_APPEND(data, j, len);
2474 }
2475
2476 return list;
2477
2478 onError:
2479 Py_DECREF(list);
2480 return NULL;
2481}
2482
2483#undef SPLIT_APPEND
2484
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002485
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002486static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002487string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002488 /* Counterparts of the obsolete stropmodule functions; except
2489 string.maketrans(). */
2490 {"join", (PyCFunction)string_join, 1, join__doc__},
2491 {"split", (PyCFunction)string_split, 1, split__doc__},
2492 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2493 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2494 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2495 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2496 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2497 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2498 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002499 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2500 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002501 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2502 {"count", (PyCFunction)string_count, 1, count__doc__},
2503 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2504 {"find", (PyCFunction)string_find, 1, find__doc__},
2505 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002506 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002507 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2508 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2509 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2510 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002511 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2512 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2513 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002514 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2515 {"title", (PyCFunction)string_title, 1, title__doc__},
2516 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2517 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2518 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002519 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002520 {"decode", (PyCFunction)string_decode, 1, decode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002521 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2522 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2523#if 0
2524 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2525#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002526 {NULL, NULL} /* sentinel */
2527};
2528
2529static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002530string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002531{
2532 return Py_FindMethod(string_methods, (PyObject*)s, name);
2533}
2534
2535
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002536PyTypeObject PyString_Type = {
2537 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002538 0,
2539 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002540 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002541 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002542 (destructor)string_dealloc, /*tp_dealloc*/
2543 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002544 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002545 0, /*tp_setattr*/
Martin v. Löwiscd353062001-05-24 16:56:35 +00002546 0, /*tp_compare*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002547 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002548 0, /*tp_as_number*/
2549 &string_as_sequence, /*tp_as_sequence*/
2550 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002551 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002552 0, /*tp_call*/
Guido van Rossum189f1df2001-05-01 16:51:53 +00002553 (reprfunc)string_str, /*tp_str*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002554 0, /*tp_getattro*/
2555 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002556 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002557 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002558 0, /*tp_doc*/
Martin v. Löwiscd353062001-05-24 16:56:35 +00002559 0, /*tp_traverse*/
2560 0, /*tp_clear*/
2561 (richcmpfunc)string_richcompare, /*tp_richcompare*/
2562 0, /*tp_weaklistoffset*/
2563 0, /*tp_iter*/
2564 0, /*tp_iternext*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002565};
2566
2567void
Fred Drakeba096332000-07-09 07:04:36 +00002568PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002569{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002570 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002571 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002572 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002573 if (w == NULL || !PyString_Check(*pv)) {
2574 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002575 *pv = NULL;
2576 return;
2577 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002578 v = string_concat((PyStringObject *) *pv, w);
2579 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002580 *pv = v;
2581}
2582
Guido van Rossum013142a1994-08-30 08:19:36 +00002583void
Fred Drakeba096332000-07-09 07:04:36 +00002584PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002585{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002586 PyString_Concat(pv, w);
2587 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002588}
2589
2590
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002591/* The following function breaks the notion that strings are immutable:
2592 it changes the size of a string. We get away with this only if there
2593 is only one module referencing the object. You can also think of it
2594 as creating a new string object and destroying the old one, only
2595 more efficiently. In any case, don't use this if the string may
2596 already be known to some other part of the code... */
2597
2598int
Fred Drakeba096332000-07-09 07:04:36 +00002599_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002600{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002601 register PyObject *v;
2602 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002603 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002604 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002605 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002606 Py_DECREF(v);
2607 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002608 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002609 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002610 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002611#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002612 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002613#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002614 _Py_ForgetReference(v);
2615 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002616 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002617 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002618 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002619 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002620 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002621 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002622 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002623 _Py_NewReference(*pv);
2624 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002625 sv->ob_size = newsize;
2626 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002627 return 0;
2628}
Guido van Rossume5372401993-03-16 12:15:04 +00002629
2630/* Helpers for formatstring */
2631
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002632static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002633getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002634{
2635 int argidx = *p_argidx;
2636 if (argidx < arglen) {
2637 (*p_argidx)++;
2638 if (arglen < 0)
2639 return args;
2640 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002641 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002642 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002643 PyErr_SetString(PyExc_TypeError,
2644 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002645 return NULL;
2646}
2647
Tim Peters38fd5b62000-09-21 05:43:11 +00002648/* Format codes
2649 * F_LJUST '-'
2650 * F_SIGN '+'
2651 * F_BLANK ' '
2652 * F_ALT '#'
2653 * F_ZERO '0'
2654 */
Guido van Rossume5372401993-03-16 12:15:04 +00002655#define F_LJUST (1<<0)
2656#define F_SIGN (1<<1)
2657#define F_BLANK (1<<2)
2658#define F_ALT (1<<3)
2659#define F_ZERO (1<<4)
2660
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002661static int
Fred Drakeba096332000-07-09 07:04:36 +00002662formatfloat(char *buf, size_t buflen, int flags,
2663 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002664{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002665 /* fmt = '%#.' + `prec` + `type`
2666 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002667 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002668 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002669 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002670 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002671 if (prec < 0)
2672 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002673 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2674 type = 'g';
2675 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002676 /* worst case length calc to ensure no buffer overrun:
2677 fmt = %#.<prec>g
2678 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002679 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002680 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2681 If prec=0 the effective precision is 1 (the leading digit is
2682 always given), therefore increase by one to 10+prec. */
2683 if (buflen <= (size_t)10 + (size_t)prec) {
2684 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002685 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002686 return -1;
2687 }
Guido van Rossume5372401993-03-16 12:15:04 +00002688 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002689 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002690}
2691
Tim Peters38fd5b62000-09-21 05:43:11 +00002692/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2693 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2694 * Python's regular ints.
2695 * Return value: a new PyString*, or NULL if error.
2696 * . *pbuf is set to point into it,
2697 * *plen set to the # of chars following that.
2698 * Caller must decref it when done using pbuf.
2699 * The string starting at *pbuf is of the form
2700 * "-"? ("0x" | "0X")? digit+
2701 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002702 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002703 * There will be at least prec digits, zero-filled on the left if
2704 * necessary to get that many.
2705 * val object to be converted
2706 * flags bitmask of format flags; only F_ALT is looked at
2707 * prec minimum number of digits; 0-fill on left if needed
2708 * type a character in [duoxX]; u acts the same as d
2709 *
2710 * CAUTION: o, x and X conversions on regular ints can never
2711 * produce a '-' sign, but can for Python's unbounded ints.
2712 */
2713PyObject*
2714_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2715 char **pbuf, int *plen)
2716{
2717 PyObject *result = NULL;
2718 char *buf;
2719 int i;
2720 int sign; /* 1 if '-', else 0 */
2721 int len; /* number of characters */
2722 int numdigits; /* len == numnondigits + numdigits */
2723 int numnondigits = 0;
2724
2725 switch (type) {
2726 case 'd':
2727 case 'u':
2728 result = val->ob_type->tp_str(val);
2729 break;
2730 case 'o':
2731 result = val->ob_type->tp_as_number->nb_oct(val);
2732 break;
2733 case 'x':
2734 case 'X':
2735 numnondigits = 2;
2736 result = val->ob_type->tp_as_number->nb_hex(val);
2737 break;
2738 default:
2739 assert(!"'type' not in [duoxX]");
2740 }
2741 if (!result)
2742 return NULL;
2743
2744 /* To modify the string in-place, there can only be one reference. */
2745 if (result->ob_refcnt != 1) {
2746 PyErr_BadInternalCall();
2747 return NULL;
2748 }
2749 buf = PyString_AsString(result);
2750 len = PyString_Size(result);
2751 if (buf[len-1] == 'L') {
2752 --len;
2753 buf[len] = '\0';
2754 }
2755 sign = buf[0] == '-';
2756 numnondigits += sign;
2757 numdigits = len - numnondigits;
2758 assert(numdigits > 0);
2759
Tim Petersfff53252001-04-12 18:38:48 +00002760 /* Get rid of base marker unless F_ALT */
2761 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002762 /* Need to skip 0x, 0X or 0. */
2763 int skipped = 0;
2764 switch (type) {
2765 case 'o':
2766 assert(buf[sign] == '0');
2767 /* If 0 is only digit, leave it alone. */
2768 if (numdigits > 1) {
2769 skipped = 1;
2770 --numdigits;
2771 }
2772 break;
2773 case 'x':
2774 case 'X':
2775 assert(buf[sign] == '0');
2776 assert(buf[sign + 1] == 'x');
2777 skipped = 2;
2778 numnondigits -= 2;
2779 break;
2780 }
2781 if (skipped) {
2782 buf += skipped;
2783 len -= skipped;
2784 if (sign)
2785 buf[0] = '-';
2786 }
2787 assert(len == numnondigits + numdigits);
2788 assert(numdigits > 0);
2789 }
2790
2791 /* Fill with leading zeroes to meet minimum width. */
2792 if (prec > numdigits) {
2793 PyObject *r1 = PyString_FromStringAndSize(NULL,
2794 numnondigits + prec);
2795 char *b1;
2796 if (!r1) {
2797 Py_DECREF(result);
2798 return NULL;
2799 }
2800 b1 = PyString_AS_STRING(r1);
2801 for (i = 0; i < numnondigits; ++i)
2802 *b1++ = *buf++;
2803 for (i = 0; i < prec - numdigits; i++)
2804 *b1++ = '0';
2805 for (i = 0; i < numdigits; i++)
2806 *b1++ = *buf++;
2807 *b1 = '\0';
2808 Py_DECREF(result);
2809 result = r1;
2810 buf = PyString_AS_STRING(result);
2811 len = numnondigits + prec;
2812 }
2813
2814 /* Fix up case for hex conversions. */
2815 switch (type) {
2816 case 'x':
2817 /* Need to convert all upper case letters to lower case. */
2818 for (i = 0; i < len; i++)
2819 if (buf[i] >= 'A' && buf[i] <= 'F')
2820 buf[i] += 'a'-'A';
2821 break;
2822 case 'X':
2823 /* Need to convert 0x to 0X (and -0x to -0X). */
2824 if (buf[sign + 1] == 'x')
2825 buf[sign + 1] = 'X';
2826 break;
2827 }
2828 *pbuf = buf;
2829 *plen = len;
2830 return result;
2831}
2832
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002833static int
Fred Drakeba096332000-07-09 07:04:36 +00002834formatint(char *buf, size_t buflen, int flags,
2835 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002836{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002837 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002838 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2839 + 1 + 1 = 24 */
2840 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002841 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002842 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002843 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002844 if (prec < 0)
2845 prec = 1;
2846 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002847 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002848 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002849 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002850 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002851 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002852 return -1;
2853 }
Guido van Rossume5372401993-03-16 12:15:04 +00002854 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002855 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2856 * but we want it (for consistency with other %#x conversions, and
2857 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002858 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2859 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2860 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002861 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002862 if (x == 0 &&
2863 (flags & F_ALT) &&
2864 (type == 'x' || type == 'X') &&
2865 buf[1] != (char)type) /* this last always true under std C */
2866 {
Tim Petersfff53252001-04-12 18:38:48 +00002867 memmove(buf+2, buf, strlen(buf) + 1);
2868 buf[0] = '0';
2869 buf[1] = (char)type;
2870 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002871 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002872}
2873
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002874static int
Fred Drakeba096332000-07-09 07:04:36 +00002875formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002876{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002877 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002878 if (PyString_Check(v)) {
2879 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002880 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002881 }
2882 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002883 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002884 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002885 }
2886 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002887 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002888}
2889
Guido van Rossum013142a1994-08-30 08:19:36 +00002890
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002891/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2892
2893 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2894 chars are formatted. XXX This is a magic number. Each formatting
2895 routine does bounds checking to ensure no overflow, but a better
2896 solution may be to malloc a buffer of appropriate size for each
2897 format. For now, the current solution is sufficient.
2898*/
2899#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002900
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002901PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002902PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002903{
2904 char *fmt, *res;
2905 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002906 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002907 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002908 PyObject *dict = NULL;
2909 if (format == NULL || !PyString_Check(format) || args == NULL) {
2910 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002911 return NULL;
2912 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002913 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002914 fmt = PyString_AsString(format);
2915 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002916 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002917 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002918 if (result == NULL)
2919 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002920 res = PyString_AsString(result);
2921 if (PyTuple_Check(args)) {
2922 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002923 argidx = 0;
2924 }
2925 else {
2926 arglen = -1;
2927 argidx = -2;
2928 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002929 if (args->ob_type->tp_as_mapping)
2930 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002931 while (--fmtcnt >= 0) {
2932 if (*fmt != '%') {
2933 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002934 rescnt = fmtcnt + 100;
2935 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002936 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002937 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002938 res = PyString_AsString(result)
2939 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002940 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002941 }
2942 *res++ = *fmt++;
2943 }
2944 else {
2945 /* Got a format specifier */
2946 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002947 int width = -1;
2948 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00002949 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002950 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002951 PyObject *v = NULL;
2952 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002953 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002954 int sign;
2955 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002956 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002957 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002958 int argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002959
Guido van Rossumda9c2711996-12-05 21:58:58 +00002960 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002961 if (*fmt == '(') {
2962 char *keystart;
2963 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002964 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002965 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002966
2967 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002968 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002969 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002970 goto error;
2971 }
2972 ++fmt;
2973 --fmtcnt;
2974 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002975 /* Skip over balanced parentheses */
2976 while (pcount > 0 && --fmtcnt >= 0) {
2977 if (*fmt == ')')
2978 --pcount;
2979 else if (*fmt == '(')
2980 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002981 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002982 }
2983 keylen = fmt - keystart - 1;
2984 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002985 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002986 "incomplete format key");
2987 goto error;
2988 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002989 key = PyString_FromStringAndSize(keystart,
2990 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002991 if (key == NULL)
2992 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002993 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002994 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002995 args_owned = 0;
2996 }
2997 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002998 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002999 if (args == NULL) {
3000 goto error;
3001 }
Guido van Rossum993952b1996-05-21 22:44:20 +00003002 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00003003 arglen = -1;
3004 argidx = -2;
3005 }
Guido van Rossume5372401993-03-16 12:15:04 +00003006 while (--fmtcnt >= 0) {
3007 switch (c = *fmt++) {
3008 case '-': flags |= F_LJUST; continue;
3009 case '+': flags |= F_SIGN; continue;
3010 case ' ': flags |= F_BLANK; continue;
3011 case '#': flags |= F_ALT; continue;
3012 case '0': flags |= F_ZERO; continue;
3013 }
3014 break;
3015 }
3016 if (c == '*') {
3017 v = getnextarg(args, arglen, &argidx);
3018 if (v == NULL)
3019 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003020 if (!PyInt_Check(v)) {
3021 PyErr_SetString(PyExc_TypeError,
3022 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003023 goto error;
3024 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003025 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00003026 if (width < 0) {
3027 flags |= F_LJUST;
3028 width = -width;
3029 }
Guido van Rossume5372401993-03-16 12:15:04 +00003030 if (--fmtcnt >= 0)
3031 c = *fmt++;
3032 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003033 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003034 width = c - '0';
3035 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003036 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003037 if (!isdigit(c))
3038 break;
3039 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003040 PyErr_SetString(
3041 PyExc_ValueError,
3042 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00003043 goto error;
3044 }
3045 width = width*10 + (c - '0');
3046 }
3047 }
3048 if (c == '.') {
3049 prec = 0;
3050 if (--fmtcnt >= 0)
3051 c = *fmt++;
3052 if (c == '*') {
3053 v = getnextarg(args, arglen, &argidx);
3054 if (v == NULL)
3055 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003056 if (!PyInt_Check(v)) {
3057 PyErr_SetString(
3058 PyExc_TypeError,
3059 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00003060 goto error;
3061 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003062 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00003063 if (prec < 0)
3064 prec = 0;
3065 if (--fmtcnt >= 0)
3066 c = *fmt++;
3067 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003068 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003069 prec = c - '0';
3070 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003071 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00003072 if (!isdigit(c))
3073 break;
3074 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003075 PyErr_SetString(
3076 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00003077 "prec too big");
3078 goto error;
3079 }
3080 prec = prec*10 + (c - '0');
3081 }
3082 }
3083 } /* prec */
3084 if (fmtcnt >= 0) {
3085 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003086 if (--fmtcnt >= 0)
3087 c = *fmt++;
3088 }
3089 }
3090 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003091 PyErr_SetString(PyExc_ValueError,
3092 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003093 goto error;
3094 }
3095 if (c != '%') {
3096 v = getnextarg(args, arglen, &argidx);
3097 if (v == NULL)
3098 goto error;
3099 }
3100 sign = 0;
3101 fill = ' ';
3102 switch (c) {
3103 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003104 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003105 len = 1;
3106 break;
3107 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003108 case 'r':
3109 if (PyUnicode_Check(v)) {
3110 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003111 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003112 goto unicode;
3113 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003114 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003115 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003116 else
3117 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003118 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003119 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003120 if (!PyString_Check(temp)) {
3121 PyErr_SetString(PyExc_TypeError,
3122 "%s argument has non-string str()");
3123 goto error;
3124 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003125 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003126 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003127 if (prec >= 0 && len > prec)
3128 len = prec;
3129 break;
3130 case 'i':
3131 case 'd':
3132 case 'u':
3133 case 'o':
3134 case 'x':
3135 case 'X':
3136 if (c == 'i')
3137 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003138 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003139 temp = _PyString_FormatLong(v, flags,
3140 prec, c, &pbuf, &len);
3141 if (!temp)
3142 goto error;
3143 /* unbounded ints can always produce
3144 a sign character! */
3145 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003146 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003147 else {
3148 pbuf = formatbuf;
3149 len = formatint(pbuf, sizeof(formatbuf),
3150 flags, prec, c, v);
3151 if (len < 0)
3152 goto error;
3153 /* only d conversion is signed */
3154 sign = c == 'd';
3155 }
3156 if (flags & F_ZERO)
3157 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003158 break;
3159 case 'e':
3160 case 'E':
3161 case 'f':
3162 case 'g':
3163 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003164 pbuf = formatbuf;
3165 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003166 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003167 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003168 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003169 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003170 fill = '0';
3171 break;
3172 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003173 pbuf = formatbuf;
3174 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003175 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003176 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003177 break;
3178 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003179 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003180 "unsupported format character '%c' (0x%x) "
3181 "at index %i",
3182 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003183 goto error;
3184 }
3185 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003186 if (*pbuf == '-' || *pbuf == '+') {
3187 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003188 len--;
3189 }
3190 else if (flags & F_SIGN)
3191 sign = '+';
3192 else if (flags & F_BLANK)
3193 sign = ' ';
3194 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003195 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003196 }
3197 if (width < len)
3198 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003199 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003200 reslen -= rescnt;
3201 rescnt = width + fmtcnt + 100;
3202 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003203 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003204 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003205 res = PyString_AsString(result)
3206 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003207 }
3208 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003209 if (fill != ' ')
3210 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003211 rescnt--;
3212 if (width > len)
3213 width--;
3214 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003215 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3216 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003217 assert(pbuf[1] == c);
3218 if (fill != ' ') {
3219 *res++ = *pbuf++;
3220 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003221 }
Tim Petersfff53252001-04-12 18:38:48 +00003222 rescnt -= 2;
3223 width -= 2;
3224 if (width < 0)
3225 width = 0;
3226 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003227 }
3228 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003229 do {
3230 --rescnt;
3231 *res++ = fill;
3232 } while (--width > len);
3233 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003234 if (fill == ' ') {
3235 if (sign)
3236 *res++ = sign;
3237 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003238 (c == 'x' || c == 'X')) {
3239 assert(pbuf[0] == '0');
3240 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003241 *res++ = *pbuf++;
3242 *res++ = *pbuf++;
3243 }
3244 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003245 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003246 res += len;
3247 rescnt -= len;
3248 while (--width >= len) {
3249 --rescnt;
3250 *res++ = ' ';
3251 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003252 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003253 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003254 "not all arguments converted");
3255 goto error;
3256 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003257 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003258 } /* '%' */
3259 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003260 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003261 PyErr_SetString(PyExc_TypeError,
3262 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003263 goto error;
3264 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003265 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003266 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003267 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003268 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003269 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003270
3271 unicode:
3272 if (args_owned) {
3273 Py_DECREF(args);
3274 args_owned = 0;
3275 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003276 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003277 if (PyTuple_Check(orig_args) && argidx > 0) {
3278 PyObject *v;
3279 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3280 v = PyTuple_New(n);
3281 if (v == NULL)
3282 goto error;
3283 while (--n >= 0) {
3284 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3285 Py_INCREF(w);
3286 PyTuple_SET_ITEM(v, n, w);
3287 }
3288 args = v;
3289 } else {
3290 Py_INCREF(orig_args);
3291 args = orig_args;
3292 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003293 args_owned = 1;
3294 /* Take what we have of the result and let the Unicode formatting
3295 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003296 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003297 if (_PyString_Resize(&result, rescnt))
3298 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003299 fmtcnt = PyString_GET_SIZE(format) - \
3300 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003301 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3302 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003303 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003304 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003305 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003306 if (v == NULL)
3307 goto error;
3308 /* Paste what we have (result) to what the Unicode formatting
3309 function returned (v) and return the result (or error) */
3310 w = PyUnicode_Concat(result, v);
3311 Py_DECREF(result);
3312 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003313 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003314 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003315
Guido van Rossume5372401993-03-16 12:15:04 +00003316 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003317 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003318 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003319 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003320 }
Guido van Rossume5372401993-03-16 12:15:04 +00003321 return NULL;
3322}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003323
3324
3325#ifdef INTERN_STRINGS
3326
Barry Warsaw4df762f2000-08-16 23:41:01 +00003327/* This dictionary will leak at PyString_Fini() time. That's acceptable
3328 * because PyString_Fini() specifically frees interned strings that are
3329 * only referenced by this dictionary. The CVS log entry for revision 2.45
3330 * says:
3331 *
3332 * Change the Fini function to only remove otherwise unreferenced
3333 * strings from the interned table. There are references in
3334 * hard-to-find static variables all over the interpreter, and it's not
3335 * worth trying to get rid of all those; but "uninterning" isn't fair
3336 * either and may cause subtle failures later -- so we have to keep them
3337 * in the interned table.
3338 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003339static PyObject *interned;
3340
3341void
Fred Drakeba096332000-07-09 07:04:36 +00003342PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003343{
3344 register PyStringObject *s = (PyStringObject *)(*p);
3345 PyObject *t;
3346 if (s == NULL || !PyString_Check(s))
3347 Py_FatalError("PyString_InternInPlace: strings only please!");
3348 if ((t = s->ob_sinterned) != NULL) {
3349 if (t == (PyObject *)s)
3350 return;
3351 Py_INCREF(t);
3352 *p = t;
3353 Py_DECREF(s);
3354 return;
3355 }
3356 if (interned == NULL) {
3357 interned = PyDict_New();
3358 if (interned == NULL)
3359 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003360 }
3361 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3362 Py_INCREF(t);
3363 *p = s->ob_sinterned = t;
3364 Py_DECREF(s);
3365 return;
3366 }
3367 t = (PyObject *)s;
3368 if (PyDict_SetItem(interned, t, t) == 0) {
3369 s->ob_sinterned = t;
3370 return;
3371 }
3372 PyErr_Clear();
3373}
3374
3375
3376PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003377PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003378{
3379 PyObject *s = PyString_FromString(cp);
3380 if (s == NULL)
3381 return NULL;
3382 PyString_InternInPlace(&s);
3383 return s;
3384}
3385
3386#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003387
3388void
Fred Drakeba096332000-07-09 07:04:36 +00003389PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003390{
3391 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003392 for (i = 0; i < UCHAR_MAX + 1; i++) {
3393 Py_XDECREF(characters[i]);
3394 characters[i] = NULL;
3395 }
3396#ifndef DONT_SHARE_SHORT_STRINGS
3397 Py_XDECREF(nullstring);
3398 nullstring = NULL;
3399#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003400#ifdef INTERN_STRINGS
3401 if (interned) {
3402 int pos, changed;
3403 PyObject *key, *value;
3404 do {
3405 changed = 0;
3406 pos = 0;
3407 while (PyDict_Next(interned, &pos, &key, &value)) {
3408 if (key->ob_refcnt == 2 && key == value) {
3409 PyDict_DelItem(interned, key);
3410 changed = 1;
3411 }
3412 }
3413 } while (changed);
3414 }
3415#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003416}
Barry Warsawa903ad982001-02-23 16:40:48 +00003417
3418#ifdef INTERN_STRINGS
3419void _Py_ReleaseInternedStrings(void)
3420{
3421 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003422 fprintf(stderr, "releasing interned strings\n");
3423 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003424 Py_DECREF(interned);
3425 interned = NULL;
3426 }
3427}
3428#endif /* INTERN_STRINGS */