blob: cf6421a024b0cabc43689684aa1f18faa28d9ca8 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000150PyObject *PyString_Decode(const char *s,
151 int size,
152 const char *encoding,
153 const char *errors)
154{
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000155 PyObject *v, *str;
156
157 str = PyString_FromStringAndSize(s, size);
158 if (str == NULL)
159 return NULL;
160 v = PyString_AsDecodedString(str, encoding, errors);
161 Py_DECREF(str);
162 return v;
163}
164
165PyObject *PyString_AsDecodedObject(PyObject *str,
166 const char *encoding,
167 const char *errors)
168{
169 PyObject *v;
170
171 if (!PyString_Check(str)) {
172 PyErr_BadArgument();
173 goto onError;
174 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000175
176 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000177 encoding = PyUnicode_GetDefaultEncoding();
178
179 /* Decode via the codec registry */
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000180 v = PyCodec_Decode(str, encoding, errors);
181 if (v == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000182 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000183
184 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000185
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000186 onError:
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000187 return NULL;
188}
189
190PyObject *PyString_AsDecodedString(PyObject *str,
191 const char *encoding,
192 const char *errors)
193{
194 PyObject *v;
195
196 v = PyString_AsDecodedObject(str, encoding, errors);
197 if (v == NULL)
198 goto onError;
199
200 /* Convert Unicode to a string using the default encoding */
201 if (PyUnicode_Check(v)) {
202 PyObject *temp = v;
203 v = PyUnicode_AsEncodedString(v, NULL, NULL);
204 Py_DECREF(temp);
205 if (v == NULL)
206 goto onError;
207 }
208 if (!PyString_Check(v)) {
209 PyErr_Format(PyExc_TypeError,
210 "decoder did not return a string object (type=%.400s)",
211 v->ob_type->tp_name);
212 Py_DECREF(v);
213 goto onError;
214 }
215
216 return v;
217
218 onError:
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000219 return NULL;
220}
221
222PyObject *PyString_Encode(const char *s,
223 int size,
224 const char *encoding,
225 const char *errors)
226{
227 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000228
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000229 str = PyString_FromStringAndSize(s, size);
230 if (str == NULL)
231 return NULL;
232 v = PyString_AsEncodedString(str, encoding, errors);
233 Py_DECREF(str);
234 return v;
235}
236
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000237PyObject *PyString_AsEncodedObject(PyObject *str,
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000238 const char *encoding,
239 const char *errors)
240{
241 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000242
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000243 if (!PyString_Check(str)) {
244 PyErr_BadArgument();
245 goto onError;
246 }
247
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000248 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000249 encoding = PyUnicode_GetDefaultEncoding();
250
251 /* Encode via the codec registry */
252 v = PyCodec_Encode(str, encoding, errors);
253 if (v == NULL)
254 goto onError;
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000255
256 return v;
257
258 onError:
259 return NULL;
260}
261
262PyObject *PyString_AsEncodedString(PyObject *str,
263 const char *encoding,
264 const char *errors)
265{
266 PyObject *v;
267
268 v = PyString_AsEncodedString(str, encoding, errors);
269 if (v == NULL)
270 goto onError;
271
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000272 /* Convert Unicode to a string using the default encoding */
273 if (PyUnicode_Check(v)) {
274 PyObject *temp = v;
275 v = PyUnicode_AsEncodedString(v, NULL, NULL);
276 Py_DECREF(temp);
277 if (v == NULL)
278 goto onError;
279 }
280 if (!PyString_Check(v)) {
281 PyErr_Format(PyExc_TypeError,
282 "encoder did not return a string object (type=%.400s)",
283 v->ob_type->tp_name);
284 Py_DECREF(v);
285 goto onError;
286 }
Marc-André Lemburg2d920412001-05-15 12:00:02 +0000287
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000288 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000289
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000290 onError:
291 return NULL;
292}
293
Guido van Rossum234f9421993-06-17 12:35:49 +0000294static void
Fred Drakeba096332000-07-09 07:04:36 +0000295string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000296{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000297 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000298}
299
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000300static int
301string_getsize(register PyObject *op)
302{
303 char *s;
304 int len;
305 if (PyString_AsStringAndSize(op, &s, &len))
306 return -1;
307 return len;
308}
309
310static /*const*/ char *
311string_getbuffer(register PyObject *op)
312{
313 char *s;
314 int len;
315 if (PyString_AsStringAndSize(op, &s, &len))
316 return NULL;
317 return s;
318}
319
Guido van Rossumd7047b31995-01-02 19:07:15 +0000320int
Fred Drakeba096332000-07-09 07:04:36 +0000321PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000322{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000323 if (!PyString_Check(op))
324 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000325 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326}
327
328/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000329PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000331 if (!PyString_Check(op))
332 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000333 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334}
335
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000336/* Internal API needed by PyString_AsStringAndSize(): */
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000337extern
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000338PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
339 const char *errors);
340
341int
342PyString_AsStringAndSize(register PyObject *obj,
343 register char **s,
344 register int *len)
345{
346 if (s == NULL) {
347 PyErr_BadInternalCall();
348 return -1;
349 }
350
351 if (!PyString_Check(obj)) {
352 if (PyUnicode_Check(obj)) {
353 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
354 if (obj == NULL)
355 return -1;
356 }
357 else {
358 PyErr_Format(PyExc_TypeError,
359 "expected string or Unicode object, "
360 "%.200s found", obj->ob_type->tp_name);
361 return -1;
362 }
363 }
364
365 *s = PyString_AS_STRING(obj);
366 if (len != NULL)
367 *len = PyString_GET_SIZE(obj);
368 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
369 PyErr_SetString(PyExc_TypeError,
370 "expected string without null bytes");
371 return -1;
372 }
373 return 0;
374}
375
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000376/* Methods */
377
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000378static int
Fred Drakeba096332000-07-09 07:04:36 +0000379string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000380{
381 int i;
382 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000383 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000384 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000385 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000386 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000387 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000388 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000389
Thomas Wouters7e474022000-07-16 12:04:32 +0000390 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000391 quote = '\'';
392 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
393 quote = '"';
394
395 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000396 for (i = 0; i < op->ob_size; i++) {
397 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000398 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000399 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000400 else if (c == '\t')
401 fprintf(fp, "\\t");
402 else if (c == '\n')
403 fprintf(fp, "\\n");
404 else if (c == '\r')
405 fprintf(fp, "\\r");
406 else if (c < ' ' || c >= 0x7f)
407 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000408 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000409 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000410 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000411 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000412 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413}
414
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000415static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000416string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000417{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000418 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
419 PyObject *v;
420 if (newsize > INT_MAX) {
421 PyErr_SetString(PyExc_OverflowError,
422 "string is too large to make repr");
423 }
424 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000425 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000426 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000427 }
428 else {
429 register int i;
430 register char c;
431 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000432 int quote;
433
Thomas Wouters7e474022000-07-16 12:04:32 +0000434 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000435 quote = '\'';
436 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
437 quote = '"';
438
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000439 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000440 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000441 for (i = 0; i < op->ob_size; i++) {
442 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000443 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000444 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000445 else if (c == '\t')
446 *p++ = '\\', *p++ = 't';
447 else if (c == '\n')
448 *p++ = '\\', *p++ = 'n';
449 else if (c == '\r')
450 *p++ = '\\', *p++ = 'r';
451 else if (c < ' ' || c >= 0x7f) {
452 sprintf(p, "\\x%02x", c & 0xff);
453 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000454 }
455 else
456 *p++ = c;
457 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000458 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000459 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000460 _PyString_Resize(
461 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000462 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000463 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000464}
465
Guido van Rossum189f1df2001-05-01 16:51:53 +0000466static PyObject *
467string_str(PyObject *s)
468{
469 Py_INCREF(s);
470 return s;
471}
472
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000473static int
Fred Drakeba096332000-07-09 07:04:36 +0000474string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000475{
476 return a->ob_size;
477}
478
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000479static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000480string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481{
482 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000483 register PyStringObject *op;
484 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000485 if (PyUnicode_Check(bb))
486 return PyUnicode_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000487 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000488 "cannot add type \"%.200s\" to string",
489 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000490 return NULL;
491 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000492#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493 /* Optimize cases with empty left or right operand */
494 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000495 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000496 return bb;
497 }
498 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000499 Py_INCREF(a);
500 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000501 }
502 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000503 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000504 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000505 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000506 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000507 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000508 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000509#ifdef CACHE_HASH
510 op->ob_shash = -1;
511#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000512#ifdef INTERN_STRINGS
513 op->ob_sinterned = NULL;
514#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000515 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
516 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
517 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000518 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000519#undef b
520}
521
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000522static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000523string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000524{
525 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000526 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000527 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000528 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000529 if (n < 0)
530 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000531 /* watch out for overflows: the size can overflow int,
532 * and the # of bytes needed can overflow size_t
533 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000534 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000535 if (n && size / n != a->ob_size) {
536 PyErr_SetString(PyExc_OverflowError,
537 "repeated string is too long");
538 return NULL;
539 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000540 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000541 Py_INCREF(a);
542 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000543 }
Tim Peters8f422462000-09-09 06:13:41 +0000544 nbytes = size * sizeof(char);
545 if (nbytes / sizeof(char) != (size_t)size ||
546 nbytes + sizeof(PyStringObject) <= nbytes) {
547 PyErr_SetString(PyExc_OverflowError,
548 "repeated string is too long");
549 return NULL;
550 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000551 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000552 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000553 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000554 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000555 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000556#ifdef CACHE_HASH
557 op->ob_shash = -1;
558#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000559#ifdef INTERN_STRINGS
560 op->ob_sinterned = NULL;
561#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000562 for (i = 0; i < size; i += a->ob_size)
563 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
564 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566}
567
568/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
569
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000570static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000571string_slice(register PyStringObject *a, register int i, register int j)
572 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000573{
574 if (i < 0)
575 i = 0;
576 if (j < 0)
577 j = 0; /* Avoid signed/unsigned bug in next line */
578 if (j > a->ob_size)
579 j = a->ob_size;
580 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000581 Py_INCREF(a);
582 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000583 }
584 if (j < i)
585 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000586 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000587}
588
Guido van Rossum9284a572000-03-07 15:53:43 +0000589static int
Fred Drakeba096332000-07-09 07:04:36 +0000590string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000591{
592 register char *s, *end;
593 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000594 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000595 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000596 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000597 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000598 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000599 return -1;
600 }
601 c = PyString_AsString(el)[0];
602 s = PyString_AsString(a);
603 end = s + PyString_Size(a);
604 while (s < end) {
605 if (c == *s++)
606 return 1;
607 }
608 return 0;
609}
610
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000611static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000612string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000613{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000614 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000615 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000616 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000617 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000618 return NULL;
619 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000620 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000621 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000622 if (v == NULL)
623 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000624 else {
625#ifdef COUNT_ALLOCS
626 one_strings++;
627#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000628 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000629 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000630 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000631}
632
633static int
Fred Drakeba096332000-07-09 07:04:36 +0000634string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000635{
Guido van Rossum253919f1991-02-13 23:18:39 +0000636 int len_a = a->ob_size, len_b = b->ob_size;
637 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000638 int cmp;
639 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000640 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000641 if (cmp == 0)
642 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
643 if (cmp != 0)
644 return cmp;
645 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000646 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000647}
648
Guido van Rossum9bfef441993-03-29 10:43:31 +0000649static long
Fred Drakeba096332000-07-09 07:04:36 +0000650string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000651{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000652 register int len;
653 register unsigned char *p;
654 register long x;
655
656#ifdef CACHE_HASH
657 if (a->ob_shash != -1)
658 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000659#ifdef INTERN_STRINGS
660 if (a->ob_sinterned != NULL)
661 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000662 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000663#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000664#endif
665 len = a->ob_size;
666 p = (unsigned char *) a->ob_sval;
667 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000668 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000669 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000670 x ^= a->ob_size;
671 if (x == -1)
672 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000673#ifdef CACHE_HASH
674 a->ob_shash = x;
675#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000676 return x;
677}
678
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000679static int
Fred Drakeba096332000-07-09 07:04:36 +0000680string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000681{
682 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000683 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000684 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000685 return -1;
686 }
687 *ptr = (void *)self->ob_sval;
688 return self->ob_size;
689}
690
691static int
Fred Drakeba096332000-07-09 07:04:36 +0000692string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000693{
Guido van Rossum045e6881997-09-08 18:30:11 +0000694 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000695 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000696 return -1;
697}
698
699static int
Fred Drakeba096332000-07-09 07:04:36 +0000700string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000701{
702 if ( lenp )
703 *lenp = self->ob_size;
704 return 1;
705}
706
Guido van Rossum1db70701998-10-08 02:18:52 +0000707static int
Fred Drakeba096332000-07-09 07:04:36 +0000708string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000709{
710 if ( index != 0 ) {
711 PyErr_SetString(PyExc_SystemError,
712 "accessing non-existent string segment");
713 return -1;
714 }
715 *ptr = self->ob_sval;
716 return self->ob_size;
717}
718
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000719static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000720 (inquiry)string_length, /*sq_length*/
721 (binaryfunc)string_concat, /*sq_concat*/
722 (intargfunc)string_repeat, /*sq_repeat*/
723 (intargfunc)string_item, /*sq_item*/
724 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000725 0, /*sq_ass_item*/
726 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000727 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000728};
729
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000730static PyBufferProcs string_as_buffer = {
731 (getreadbufferproc)string_buffer_getreadbuf,
732 (getwritebufferproc)string_buffer_getwritebuf,
733 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000734 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000735};
736
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000737
738
739#define LEFTSTRIP 0
740#define RIGHTSTRIP 1
741#define BOTHSTRIP 2
742
743
744static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000745split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000746{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000747 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000748 PyObject* item;
749 PyObject *list = PyList_New(0);
750
751 if (list == NULL)
752 return NULL;
753
Guido van Rossum4c08d552000-03-10 22:55:18 +0000754 for (i = j = 0; i < len; ) {
755 while (i < len && isspace(Py_CHARMASK(s[i])))
756 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000757 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000758 while (i < len && !isspace(Py_CHARMASK(s[i])))
759 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000760 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000761 if (maxsplit-- <= 0)
762 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000763 item = PyString_FromStringAndSize(s+j, (int)(i-j));
764 if (item == NULL)
765 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000766 err = PyList_Append(list, item);
767 Py_DECREF(item);
768 if (err < 0)
769 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000770 while (i < len && isspace(Py_CHARMASK(s[i])))
771 i++;
772 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000773 }
774 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000775 if (j < len) {
776 item = PyString_FromStringAndSize(s+j, (int)(len - j));
777 if (item == NULL)
778 goto finally;
779 err = PyList_Append(list, item);
780 Py_DECREF(item);
781 if (err < 0)
782 goto finally;
783 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000784 return list;
785 finally:
786 Py_DECREF(list);
787 return NULL;
788}
789
790
791static char split__doc__[] =
792"S.split([sep [,maxsplit]]) -> list of strings\n\
793\n\
794Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000795delimiter string. If maxsplit is given, at most maxsplit\n\
796splits are done. If sep is not specified, any whitespace string\n\
797is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000798
799static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000800string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000801{
802 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000803 int maxsplit = -1;
804 const char *s = PyString_AS_STRING(self), *sub;
805 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000806
Guido van Rossum4c08d552000-03-10 22:55:18 +0000807 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000808 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000809 if (maxsplit < 0)
810 maxsplit = INT_MAX;
811 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000812 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000813 if (PyString_Check(subobj)) {
814 sub = PyString_AS_STRING(subobj);
815 n = PyString_GET_SIZE(subobj);
816 }
817 else if (PyUnicode_Check(subobj))
818 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
819 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
820 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821 if (n == 0) {
822 PyErr_SetString(PyExc_ValueError, "empty separator");
823 return NULL;
824 }
825
826 list = PyList_New(0);
827 if (list == NULL)
828 return NULL;
829
830 i = j = 0;
831 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000832 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000833 if (maxsplit-- <= 0)
834 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000835 item = PyString_FromStringAndSize(s+j, (int)(i-j));
836 if (item == NULL)
837 goto fail;
838 err = PyList_Append(list, item);
839 Py_DECREF(item);
840 if (err < 0)
841 goto fail;
842 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000843 }
844 else
845 i++;
846 }
847 item = PyString_FromStringAndSize(s+j, (int)(len-j));
848 if (item == NULL)
849 goto fail;
850 err = PyList_Append(list, item);
851 Py_DECREF(item);
852 if (err < 0)
853 goto fail;
854
855 return list;
856
857 fail:
858 Py_DECREF(list);
859 return NULL;
860}
861
862
863static char join__doc__[] =
864"S.join(sequence) -> string\n\
865\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000866Return a string which is the concatenation of the strings in the\n\
867sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000868
869static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000870string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000871{
872 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000873 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000874 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000875 char *p;
876 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000877 size_t sz = 0;
878 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000879 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000880
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000881 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000882 return NULL;
883
Tim Peters19fe14e2001-01-19 03:03:47 +0000884 seq = PySequence_Fast(orig, "");
885 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000886 if (PyErr_ExceptionMatches(PyExc_TypeError))
887 PyErr_Format(PyExc_TypeError,
888 "sequence expected, %.80s found",
889 orig->ob_type->tp_name);
890 return NULL;
891 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000892
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000893 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000894 if (seqlen == 0) {
895 Py_DECREF(seq);
896 return PyString_FromString("");
897 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000898 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000899 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000900 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
901 PyErr_Format(PyExc_TypeError,
902 "sequence item 0: expected string,"
903 " %.80s found",
904 item->ob_type->tp_name);
905 Py_DECREF(seq);
906 return NULL;
907 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000908 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000909 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000910 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000911 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000912
Tim Peters19fe14e2001-01-19 03:03:47 +0000913 /* There are at least two things to join. Do a pre-pass to figure out
914 * the total amount of space we'll need (sz), see whether any argument
915 * is absurd, and defer to the Unicode join if appropriate.
916 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000917 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000918 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000919 item = PySequence_Fast_GET_ITEM(seq, i);
920 if (!PyString_Check(item)){
921 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +0000922 /* Defer to Unicode join.
923 * CAUTION: There's no gurantee that the
924 * original sequence can be iterated over
925 * again, so we must pass seq here.
926 */
927 PyObject *result;
928 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000929 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +0000930 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000931 }
932 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000933 "sequence item %i: expected string,"
934 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000935 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000936 Py_DECREF(seq);
937 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000938 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000939 sz += PyString_GET_SIZE(item);
940 if (i != 0)
941 sz += seplen;
942 if (sz < old_sz || sz > INT_MAX) {
943 PyErr_SetString(PyExc_OverflowError,
944 "join() is too long for a Python string");
945 Py_DECREF(seq);
946 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000947 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000948 }
949
950 /* Allocate result space. */
951 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
952 if (res == NULL) {
953 Py_DECREF(seq);
954 return NULL;
955 }
956
957 /* Catenate everything. */
958 p = PyString_AS_STRING(res);
959 for (i = 0; i < seqlen; ++i) {
960 size_t n;
961 item = PySequence_Fast_GET_ITEM(seq, i);
962 n = PyString_GET_SIZE(item);
963 memcpy(p, PyString_AS_STRING(item), n);
964 p += n;
965 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000966 memcpy(p, sep, seplen);
967 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000968 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000969 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000970
Jeremy Hylton49048292000-07-11 03:28:17 +0000971 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000972 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000973}
974
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000975static long
Fred Drakeba096332000-07-09 07:04:36 +0000976string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000977{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000978 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000979 int len = PyString_GET_SIZE(self);
980 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000981 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000982
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000983 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +0000984 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000985 return -2;
986 if (PyString_Check(subobj)) {
987 sub = PyString_AS_STRING(subobj);
988 n = PyString_GET_SIZE(subobj);
989 }
990 else if (PyUnicode_Check(subobj))
991 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
992 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000993 return -2;
994
995 if (last > len)
996 last = len;
997 if (last < 0)
998 last += len;
999 if (last < 0)
1000 last = 0;
1001 if (i < 0)
1002 i += len;
1003 if (i < 0)
1004 i = 0;
1005
Guido van Rossum4c08d552000-03-10 22:55:18 +00001006 if (dir > 0) {
1007 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001008 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001009 last -= n;
1010 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +00001011 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001012 return (long)i;
1013 }
1014 else {
1015 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001016
Guido van Rossum4c08d552000-03-10 22:55:18 +00001017 if (n == 0 && i <= last)
1018 return (long)last;
1019 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +00001020 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001021 return (long)j;
1022 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001023
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001024 return -1;
1025}
1026
1027
1028static char find__doc__[] =
1029"S.find(sub [,start [,end]]) -> int\n\
1030\n\
1031Return the lowest index in S where substring sub is found,\n\
1032such that sub is contained within s[start,end]. Optional\n\
1033arguments start and end are interpreted as in slice notation.\n\
1034\n\
1035Return -1 on failure.";
1036
1037static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001038string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001039{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001040 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001041 if (result == -2)
1042 return NULL;
1043 return PyInt_FromLong(result);
1044}
1045
1046
1047static char index__doc__[] =
1048"S.index(sub [,start [,end]]) -> int\n\
1049\n\
1050Like S.find() but raise ValueError when the substring is not found.";
1051
1052static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001053string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001054{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001055 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001056 if (result == -2)
1057 return NULL;
1058 if (result == -1) {
1059 PyErr_SetString(PyExc_ValueError,
1060 "substring not found in string.index");
1061 return NULL;
1062 }
1063 return PyInt_FromLong(result);
1064}
1065
1066
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001067static char rfind__doc__[] =
1068"S.rfind(sub [,start [,end]]) -> int\n\
1069\n\
1070Return the highest index in S where substring sub is found,\n\
1071such that sub is contained within s[start,end]. Optional\n\
1072arguments start and end are interpreted as in slice notation.\n\
1073\n\
1074Return -1 on failure.";
1075
1076static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001077string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001078{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001079 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001080 if (result == -2)
1081 return NULL;
1082 return PyInt_FromLong(result);
1083}
1084
1085
1086static char rindex__doc__[] =
1087"S.rindex(sub [,start [,end]]) -> int\n\
1088\n\
1089Like S.rfind() but raise ValueError when the substring is not found.";
1090
1091static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001092string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001093{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001094 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001095 if (result == -2)
1096 return NULL;
1097 if (result == -1) {
1098 PyErr_SetString(PyExc_ValueError,
1099 "substring not found in string.rindex");
1100 return NULL;
1101 }
1102 return PyInt_FromLong(result);
1103}
1104
1105
1106static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001107do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001108{
1109 char *s = PyString_AS_STRING(self);
1110 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001111
Guido van Rossum43713e52000-02-29 13:59:29 +00001112 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113 return NULL;
1114
1115 i = 0;
1116 if (striptype != RIGHTSTRIP) {
1117 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1118 i++;
1119 }
1120 }
1121
1122 j = len;
1123 if (striptype != LEFTSTRIP) {
1124 do {
1125 j--;
1126 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1127 j++;
1128 }
1129
1130 if (i == 0 && j == len) {
1131 Py_INCREF(self);
1132 return (PyObject*)self;
1133 }
1134 else
1135 return PyString_FromStringAndSize(s+i, j-i);
1136}
1137
1138
1139static char strip__doc__[] =
1140"S.strip() -> string\n\
1141\n\
1142Return a copy of the string S with leading and trailing\n\
1143whitespace removed.";
1144
1145static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001146string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001147{
1148 return do_strip(self, args, BOTHSTRIP);
1149}
1150
1151
1152static char lstrip__doc__[] =
1153"S.lstrip() -> string\n\
1154\n\
1155Return a copy of the string S with leading whitespace removed.";
1156
1157static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001158string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001159{
1160 return do_strip(self, args, LEFTSTRIP);
1161}
1162
1163
1164static char rstrip__doc__[] =
1165"S.rstrip() -> string\n\
1166\n\
1167Return a copy of the string S with trailing whitespace removed.";
1168
1169static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001170string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001171{
1172 return do_strip(self, args, RIGHTSTRIP);
1173}
1174
1175
1176static char lower__doc__[] =
1177"S.lower() -> string\n\
1178\n\
1179Return a copy of the string S converted to lowercase.";
1180
1181static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001182string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001183{
1184 char *s = PyString_AS_STRING(self), *s_new;
1185 int i, n = PyString_GET_SIZE(self);
1186 PyObject *new;
1187
Guido van Rossum43713e52000-02-29 13:59:29 +00001188 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001189 return NULL;
1190 new = PyString_FromStringAndSize(NULL, n);
1191 if (new == NULL)
1192 return NULL;
1193 s_new = PyString_AsString(new);
1194 for (i = 0; i < n; i++) {
1195 int c = Py_CHARMASK(*s++);
1196 if (isupper(c)) {
1197 *s_new = tolower(c);
1198 } else
1199 *s_new = c;
1200 s_new++;
1201 }
1202 return new;
1203}
1204
1205
1206static char upper__doc__[] =
1207"S.upper() -> string\n\
1208\n\
1209Return a copy of the string S converted to uppercase.";
1210
1211static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001212string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001213{
1214 char *s = PyString_AS_STRING(self), *s_new;
1215 int i, n = PyString_GET_SIZE(self);
1216 PyObject *new;
1217
Guido van Rossum43713e52000-02-29 13:59:29 +00001218 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001219 return NULL;
1220 new = PyString_FromStringAndSize(NULL, n);
1221 if (new == NULL)
1222 return NULL;
1223 s_new = PyString_AsString(new);
1224 for (i = 0; i < n; i++) {
1225 int c = Py_CHARMASK(*s++);
1226 if (islower(c)) {
1227 *s_new = toupper(c);
1228 } else
1229 *s_new = c;
1230 s_new++;
1231 }
1232 return new;
1233}
1234
1235
Guido van Rossum4c08d552000-03-10 22:55:18 +00001236static char title__doc__[] =
1237"S.title() -> string\n\
1238\n\
1239Return a titlecased version of S, i.e. words start with uppercase\n\
1240characters, all remaining cased characters have lowercase.";
1241
1242static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001243string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001244{
1245 char *s = PyString_AS_STRING(self), *s_new;
1246 int i, n = PyString_GET_SIZE(self);
1247 int previous_is_cased = 0;
1248 PyObject *new;
1249
1250 if (!PyArg_ParseTuple(args, ":title"))
1251 return NULL;
1252 new = PyString_FromStringAndSize(NULL, n);
1253 if (new == NULL)
1254 return NULL;
1255 s_new = PyString_AsString(new);
1256 for (i = 0; i < n; i++) {
1257 int c = Py_CHARMASK(*s++);
1258 if (islower(c)) {
1259 if (!previous_is_cased)
1260 c = toupper(c);
1261 previous_is_cased = 1;
1262 } else if (isupper(c)) {
1263 if (previous_is_cased)
1264 c = tolower(c);
1265 previous_is_cased = 1;
1266 } else
1267 previous_is_cased = 0;
1268 *s_new++ = c;
1269 }
1270 return new;
1271}
1272
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001273static char capitalize__doc__[] =
1274"S.capitalize() -> string\n\
1275\n\
1276Return a copy of the string S with only its first character\n\
1277capitalized.";
1278
1279static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001280string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001281{
1282 char *s = PyString_AS_STRING(self), *s_new;
1283 int i, n = PyString_GET_SIZE(self);
1284 PyObject *new;
1285
Guido van Rossum43713e52000-02-29 13:59:29 +00001286 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001287 return NULL;
1288 new = PyString_FromStringAndSize(NULL, n);
1289 if (new == NULL)
1290 return NULL;
1291 s_new = PyString_AsString(new);
1292 if (0 < n) {
1293 int c = Py_CHARMASK(*s++);
1294 if (islower(c))
1295 *s_new = toupper(c);
1296 else
1297 *s_new = c;
1298 s_new++;
1299 }
1300 for (i = 1; i < n; i++) {
1301 int c = Py_CHARMASK(*s++);
1302 if (isupper(c))
1303 *s_new = tolower(c);
1304 else
1305 *s_new = c;
1306 s_new++;
1307 }
1308 return new;
1309}
1310
1311
1312static char count__doc__[] =
1313"S.count(sub[, start[, end]]) -> int\n\
1314\n\
1315Return the number of occurrences of substring sub in string\n\
1316S[start:end]. Optional arguments start and end are\n\
1317interpreted as in slice notation.";
1318
1319static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001320string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001321{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001322 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001323 int len = PyString_GET_SIZE(self), n;
1324 int i = 0, last = INT_MAX;
1325 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001326 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001327
Guido van Rossumc6821402000-05-08 14:08:05 +00001328 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1329 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001331
Guido van Rossum4c08d552000-03-10 22:55:18 +00001332 if (PyString_Check(subobj)) {
1333 sub = PyString_AS_STRING(subobj);
1334 n = PyString_GET_SIZE(subobj);
1335 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001336 else if (PyUnicode_Check(subobj)) {
1337 int count;
1338 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1339 if (count == -1)
1340 return NULL;
1341 else
1342 return PyInt_FromLong((long) count);
1343 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001344 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1345 return NULL;
1346
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001347 if (last > len)
1348 last = len;
1349 if (last < 0)
1350 last += len;
1351 if (last < 0)
1352 last = 0;
1353 if (i < 0)
1354 i += len;
1355 if (i < 0)
1356 i = 0;
1357 m = last + 1 - n;
1358 if (n == 0)
1359 return PyInt_FromLong((long) (m-i));
1360
1361 r = 0;
1362 while (i < m) {
1363 if (!memcmp(s+i, sub, n)) {
1364 r++;
1365 i += n;
1366 } else {
1367 i++;
1368 }
1369 }
1370 return PyInt_FromLong((long) r);
1371}
1372
1373
1374static char swapcase__doc__[] =
1375"S.swapcase() -> string\n\
1376\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001377Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001378converted to lowercase and vice versa.";
1379
1380static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001381string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001382{
1383 char *s = PyString_AS_STRING(self), *s_new;
1384 int i, n = PyString_GET_SIZE(self);
1385 PyObject *new;
1386
Guido van Rossum43713e52000-02-29 13:59:29 +00001387 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001388 return NULL;
1389 new = PyString_FromStringAndSize(NULL, n);
1390 if (new == NULL)
1391 return NULL;
1392 s_new = PyString_AsString(new);
1393 for (i = 0; i < n; i++) {
1394 int c = Py_CHARMASK(*s++);
1395 if (islower(c)) {
1396 *s_new = toupper(c);
1397 }
1398 else if (isupper(c)) {
1399 *s_new = tolower(c);
1400 }
1401 else
1402 *s_new = c;
1403 s_new++;
1404 }
1405 return new;
1406}
1407
1408
1409static char translate__doc__[] =
1410"S.translate(table [,deletechars]) -> string\n\
1411\n\
1412Return a copy of the string S, where all characters occurring\n\
1413in the optional argument deletechars are removed, and the\n\
1414remaining characters have been mapped through the given\n\
1415translation table, which must be a string of length 256.";
1416
1417static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001418string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001419{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001420 register char *input, *output;
1421 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001422 register int i, c, changed = 0;
1423 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001424 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001425 int inlen, tablen, dellen = 0;
1426 PyObject *result;
1427 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001428 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001429
Guido van Rossum4c08d552000-03-10 22:55:18 +00001430 if (!PyArg_ParseTuple(args, "O|O:translate",
1431 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001432 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001433
1434 if (PyString_Check(tableobj)) {
1435 table1 = PyString_AS_STRING(tableobj);
1436 tablen = PyString_GET_SIZE(tableobj);
1437 }
1438 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001439 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001440 parameter; instead a mapping to None will cause characters
1441 to be deleted. */
1442 if (delobj != NULL) {
1443 PyErr_SetString(PyExc_TypeError,
1444 "deletions are implemented differently for unicode");
1445 return NULL;
1446 }
1447 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1448 }
1449 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001450 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001451
1452 if (delobj != NULL) {
1453 if (PyString_Check(delobj)) {
1454 del_table = PyString_AS_STRING(delobj);
1455 dellen = PyString_GET_SIZE(delobj);
1456 }
1457 else if (PyUnicode_Check(delobj)) {
1458 PyErr_SetString(PyExc_TypeError,
1459 "deletions are implemented differently for unicode");
1460 return NULL;
1461 }
1462 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1463 return NULL;
1464
1465 if (tablen != 256) {
1466 PyErr_SetString(PyExc_ValueError,
1467 "translation table must be 256 characters long");
1468 return NULL;
1469 }
1470 }
1471 else {
1472 del_table = NULL;
1473 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001474 }
1475
1476 table = table1;
1477 inlen = PyString_Size(input_obj);
1478 result = PyString_FromStringAndSize((char *)NULL, inlen);
1479 if (result == NULL)
1480 return NULL;
1481 output_start = output = PyString_AsString(result);
1482 input = PyString_AsString(input_obj);
1483
1484 if (dellen == 0) {
1485 /* If no deletions are required, use faster code */
1486 for (i = inlen; --i >= 0; ) {
1487 c = Py_CHARMASK(*input++);
1488 if (Py_CHARMASK((*output++ = table[c])) != c)
1489 changed = 1;
1490 }
1491 if (changed)
1492 return result;
1493 Py_DECREF(result);
1494 Py_INCREF(input_obj);
1495 return input_obj;
1496 }
1497
1498 for (i = 0; i < 256; i++)
1499 trans_table[i] = Py_CHARMASK(table[i]);
1500
1501 for (i = 0; i < dellen; i++)
1502 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1503
1504 for (i = inlen; --i >= 0; ) {
1505 c = Py_CHARMASK(*input++);
1506 if (trans_table[c] != -1)
1507 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1508 continue;
1509 changed = 1;
1510 }
1511 if (!changed) {
1512 Py_DECREF(result);
1513 Py_INCREF(input_obj);
1514 return input_obj;
1515 }
1516 /* Fix the size of the resulting string */
1517 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1518 return NULL;
1519 return result;
1520}
1521
1522
1523/* What follows is used for implementing replace(). Perry Stoll. */
1524
1525/*
1526 mymemfind
1527
1528 strstr replacement for arbitrary blocks of memory.
1529
Barry Warsaw51ac5802000-03-20 16:36:48 +00001530 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001531 contents of memory pointed to by PAT. Returns the index into MEM if
1532 found, or -1 if not found. If len of PAT is greater than length of
1533 MEM, the function returns -1.
1534*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001535static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001536mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001537{
1538 register int ii;
1539
1540 /* pattern can not occur in the last pat_len-1 chars */
1541 len -= pat_len;
1542
1543 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001544 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001545 return ii;
1546 }
1547 }
1548 return -1;
1549}
1550
1551/*
1552 mymemcnt
1553
1554 Return the number of distinct times PAT is found in MEM.
1555 meaning mem=1111 and pat==11 returns 2.
1556 mem=11111 and pat==11 also return 2.
1557 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001558static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001559mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001560{
1561 register int offset = 0;
1562 int nfound = 0;
1563
1564 while (len >= 0) {
1565 offset = mymemfind(mem, len, pat, pat_len);
1566 if (offset == -1)
1567 break;
1568 mem += offset + pat_len;
1569 len -= offset + pat_len;
1570 nfound++;
1571 }
1572 return nfound;
1573}
1574
1575/*
1576 mymemreplace
1577
Thomas Wouters7e474022000-07-16 12:04:32 +00001578 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001579 replaced with SUB.
1580
Thomas Wouters7e474022000-07-16 12:04:32 +00001581 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001582 of PAT in STR, then the original string is returned. Otherwise, a new
1583 string is allocated here and returned.
1584
1585 on return, out_len is:
1586 the length of output string, or
1587 -1 if the input string is returned, or
1588 unchanged if an error occurs (no memory).
1589
1590 return value is:
1591 the new string allocated locally, or
1592 NULL if an error occurred.
1593*/
1594static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001595mymemreplace(const char *str, int len, /* input string */
1596 const char *pat, int pat_len, /* pattern string to find */
1597 const char *sub, int sub_len, /* substitution string */
1598 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001599 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001600{
1601 char *out_s;
1602 char *new_s;
1603 int nfound, offset, new_len;
1604
1605 if (len == 0 || pat_len > len)
1606 goto return_same;
1607
1608 /* find length of output string */
1609 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001610 if (count < 0)
1611 count = INT_MAX;
1612 else if (nfound > count)
1613 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001614 if (nfound == 0)
1615 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001616
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001617 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001618 if (new_len == 0) {
1619 /* Have to allocate something for the caller to free(). */
1620 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001621 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001622 return NULL;
1623 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001624 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001625 else {
1626 assert(new_len > 0);
1627 new_s = (char *)PyMem_MALLOC(new_len);
1628 if (new_s == NULL)
1629 return NULL;
1630 out_s = new_s;
1631
Tim Peters9c012af2001-05-10 00:32:57 +00001632 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001633 /* find index of next instance of pattern */
1634 offset = mymemfind(str, len, pat, pat_len);
1635 if (offset == -1)
1636 break;
1637
1638 /* copy non matching part of input string */
1639 memcpy(new_s, str, offset);
1640 str += offset + pat_len;
1641 len -= offset + pat_len;
1642
1643 /* copy substitute into the output string */
1644 new_s += offset;
1645 memcpy(new_s, sub, sub_len);
1646 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001647 }
1648 /* copy any remaining values into output string */
1649 if (len > 0)
1650 memcpy(new_s, str, len);
1651 }
1652 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001653 return out_s;
1654
1655 return_same:
1656 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001657 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001658}
1659
1660
1661static char replace__doc__[] =
1662"S.replace (old, new[, maxsplit]) -> string\n\
1663\n\
1664Return a copy of string S with all occurrences of substring\n\
1665old replaced by new. If the optional argument maxsplit is\n\
1666given, only the first maxsplit occurrences are replaced.";
1667
1668static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001669string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671 const char *str = PyString_AS_STRING(self), *sub, *repl;
1672 char *new_s;
1673 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1674 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001675 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001676 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001677
Guido van Rossum4c08d552000-03-10 22:55:18 +00001678 if (!PyArg_ParseTuple(args, "OO|i:replace",
1679 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001680 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001681
1682 if (PyString_Check(subobj)) {
1683 sub = PyString_AS_STRING(subobj);
1684 sub_len = PyString_GET_SIZE(subobj);
1685 }
1686 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001687 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001688 subobj, replobj, count);
1689 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1690 return NULL;
1691
1692 if (PyString_Check(replobj)) {
1693 repl = PyString_AS_STRING(replobj);
1694 repl_len = PyString_GET_SIZE(replobj);
1695 }
1696 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001697 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001698 subobj, replobj, count);
1699 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1700 return NULL;
1701
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001702 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001703 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001704 return NULL;
1705 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001706 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001707 if (new_s == NULL) {
1708 PyErr_NoMemory();
1709 return NULL;
1710 }
1711 if (out_len == -1) {
1712 /* we're returning another reference to self */
1713 new = (PyObject*)self;
1714 Py_INCREF(new);
1715 }
1716 else {
1717 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001718 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001719 }
1720 return new;
1721}
1722
1723
1724static char startswith__doc__[] =
1725"S.startswith(prefix[, start[, end]]) -> int\n\
1726\n\
1727Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1728optional start, test S beginning at that position. With optional end, stop\n\
1729comparing S at that position.";
1730
1731static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001732string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001733{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001734 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001735 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001736 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001737 int plen;
1738 int start = 0;
1739 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001740 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001741
Guido van Rossumc6821402000-05-08 14:08:05 +00001742 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1743 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001744 return NULL;
1745 if (PyString_Check(subobj)) {
1746 prefix = PyString_AS_STRING(subobj);
1747 plen = PyString_GET_SIZE(subobj);
1748 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001749 else if (PyUnicode_Check(subobj)) {
1750 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001751 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001752 subobj, start, end, -1);
1753 if (rc == -1)
1754 return NULL;
1755 else
1756 return PyInt_FromLong((long) rc);
1757 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001758 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759 return NULL;
1760
1761 /* adopt Java semantics for index out of range. it is legal for
1762 * offset to be == plen, but this only returns true if prefix is
1763 * the empty string.
1764 */
1765 if (start < 0 || start+plen > len)
1766 return PyInt_FromLong(0);
1767
1768 if (!memcmp(str+start, prefix, plen)) {
1769 /* did the match end after the specified end? */
1770 if (end < 0)
1771 return PyInt_FromLong(1);
1772 else if (end - start < plen)
1773 return PyInt_FromLong(0);
1774 else
1775 return PyInt_FromLong(1);
1776 }
1777 else return PyInt_FromLong(0);
1778}
1779
1780
1781static char endswith__doc__[] =
1782"S.endswith(suffix[, start[, end]]) -> int\n\
1783\n\
1784Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1785optional start, test S beginning at that position. With optional end, stop\n\
1786comparing S at that position.";
1787
1788static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001789string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001790{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001791 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001792 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001793 const char* suffix;
1794 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001795 int start = 0;
1796 int end = -1;
1797 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001798 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001799
Guido van Rossumc6821402000-05-08 14:08:05 +00001800 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1801 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001802 return NULL;
1803 if (PyString_Check(subobj)) {
1804 suffix = PyString_AS_STRING(subobj);
1805 slen = PyString_GET_SIZE(subobj);
1806 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001807 else if (PyUnicode_Check(subobj)) {
1808 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001809 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001810 subobj, start, end, +1);
1811 if (rc == -1)
1812 return NULL;
1813 else
1814 return PyInt_FromLong((long) rc);
1815 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001816 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001817 return NULL;
1818
Guido van Rossum4c08d552000-03-10 22:55:18 +00001819 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001820 return PyInt_FromLong(0);
1821
1822 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001823 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001824
Guido van Rossum4c08d552000-03-10 22:55:18 +00001825 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001826 return PyInt_FromLong(1);
1827 else return PyInt_FromLong(0);
1828}
1829
1830
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001831static char encode__doc__[] =
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001832"S.encode([encoding[,errors]]) -> object\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001833\n\
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001834Encodes S using the codec registered for encoding. encoding defaults\n\
1835to the default encoding. errors may be given to set a different error\n\
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001836handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1837a ValueError. Other possible values are 'ignore' and 'replace'.";
1838
1839static PyObject *
1840string_encode(PyStringObject *self, PyObject *args)
1841{
1842 char *encoding = NULL;
1843 char *errors = NULL;
1844 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1845 return NULL;
Marc-André Lemburg2d920412001-05-15 12:00:02 +00001846 return PyString_AsEncodedObject((PyObject *)self, encoding, errors);
1847}
1848
1849
1850static char decode__doc__[] =
1851"S.decode([encoding[,errors]]) -> object\n\
1852\n\
1853Decodes S using the codec registered for encoding. encoding defaults\n\
1854to the default encoding. errors may be given to set a different error\n\
1855handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1856a ValueError. Other possible values are 'ignore' and 'replace'.";
1857
1858static PyObject *
1859string_decode(PyStringObject *self, PyObject *args)
1860{
1861 char *encoding = NULL;
1862 char *errors = NULL;
1863 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
1864 return NULL;
1865 return PyString_AsDecodedObject((PyObject *)self, encoding, errors);
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001866}
1867
1868
Guido van Rossum4c08d552000-03-10 22:55:18 +00001869static char expandtabs__doc__[] =
1870"S.expandtabs([tabsize]) -> string\n\
1871\n\
1872Return a copy of S where all tab characters are expanded using spaces.\n\
1873If tabsize is not given, a tab size of 8 characters is assumed.";
1874
1875static PyObject*
1876string_expandtabs(PyStringObject *self, PyObject *args)
1877{
1878 const char *e, *p;
1879 char *q;
1880 int i, j;
1881 PyObject *u;
1882 int tabsize = 8;
1883
1884 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1885 return NULL;
1886
Thomas Wouters7e474022000-07-16 12:04:32 +00001887 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001888 i = j = 0;
1889 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1890 for (p = PyString_AS_STRING(self); p < e; p++)
1891 if (*p == '\t') {
1892 if (tabsize > 0)
1893 j += tabsize - (j % tabsize);
1894 }
1895 else {
1896 j++;
1897 if (*p == '\n' || *p == '\r') {
1898 i += j;
1899 j = 0;
1900 }
1901 }
1902
1903 /* Second pass: create output string and fill it */
1904 u = PyString_FromStringAndSize(NULL, i + j);
1905 if (!u)
1906 return NULL;
1907
1908 j = 0;
1909 q = PyString_AS_STRING(u);
1910
1911 for (p = PyString_AS_STRING(self); p < e; p++)
1912 if (*p == '\t') {
1913 if (tabsize > 0) {
1914 i = tabsize - (j % tabsize);
1915 j += i;
1916 while (i--)
1917 *q++ = ' ';
1918 }
1919 }
1920 else {
1921 j++;
1922 *q++ = *p;
1923 if (*p == '\n' || *p == '\r')
1924 j = 0;
1925 }
1926
1927 return u;
1928}
1929
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001930static
1931PyObject *pad(PyStringObject *self,
1932 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001933 int right,
1934 char fill)
1935{
1936 PyObject *u;
1937
1938 if (left < 0)
1939 left = 0;
1940 if (right < 0)
1941 right = 0;
1942
1943 if (left == 0 && right == 0) {
1944 Py_INCREF(self);
1945 return (PyObject *)self;
1946 }
1947
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001948 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001949 left + PyString_GET_SIZE(self) + right);
1950 if (u) {
1951 if (left)
1952 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001953 memcpy(PyString_AS_STRING(u) + left,
1954 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00001955 PyString_GET_SIZE(self));
1956 if (right)
1957 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1958 fill, right);
1959 }
1960
1961 return u;
1962}
1963
1964static char ljust__doc__[] =
1965"S.ljust(width) -> string\n\
1966\n\
1967Return S left justified in a string of length width. Padding is\n\
1968done using spaces.";
1969
1970static PyObject *
1971string_ljust(PyStringObject *self, PyObject *args)
1972{
1973 int width;
1974 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1975 return NULL;
1976
1977 if (PyString_GET_SIZE(self) >= width) {
1978 Py_INCREF(self);
1979 return (PyObject*) self;
1980 }
1981
1982 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1983}
1984
1985
1986static char rjust__doc__[] =
1987"S.rjust(width) -> string\n\
1988\n\
1989Return S right justified in a string of length width. Padding is\n\
1990done using spaces.";
1991
1992static PyObject *
1993string_rjust(PyStringObject *self, PyObject *args)
1994{
1995 int width;
1996 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1997 return NULL;
1998
1999 if (PyString_GET_SIZE(self) >= width) {
2000 Py_INCREF(self);
2001 return (PyObject*) self;
2002 }
2003
2004 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
2005}
2006
2007
2008static char center__doc__[] =
2009"S.center(width) -> string\n\
2010\n\
2011Return S centered in a string of length width. Padding is done\n\
2012using spaces.";
2013
2014static PyObject *
2015string_center(PyStringObject *self, PyObject *args)
2016{
2017 int marg, left;
2018 int width;
2019
2020 if (!PyArg_ParseTuple(args, "i:center", &width))
2021 return NULL;
2022
2023 if (PyString_GET_SIZE(self) >= width) {
2024 Py_INCREF(self);
2025 return (PyObject*) self;
2026 }
2027
2028 marg = width - PyString_GET_SIZE(self);
2029 left = marg / 2 + (marg & width & 1);
2030
2031 return pad(self, left, marg - left, ' ');
2032}
2033
2034#if 0
2035static char zfill__doc__[] =
2036"S.zfill(width) -> string\n\
2037\n\
2038Pad a numeric string x with zeros on the left, to fill a field\n\
2039of the specified width. The string x is never truncated.";
2040
2041static PyObject *
2042string_zfill(PyStringObject *self, PyObject *args)
2043{
2044 int fill;
2045 PyObject *u;
2046 char *str;
2047
2048 int width;
2049 if (!PyArg_ParseTuple(args, "i:zfill", &width))
2050 return NULL;
2051
2052 if (PyString_GET_SIZE(self) >= width) {
2053 Py_INCREF(self);
2054 return (PyObject*) self;
2055 }
2056
2057 fill = width - PyString_GET_SIZE(self);
2058
2059 u = pad(self, fill, 0, '0');
2060 if (u == NULL)
2061 return NULL;
2062
2063 str = PyString_AS_STRING(u);
2064 if (str[fill] == '+' || str[fill] == '-') {
2065 /* move sign to beginning of string */
2066 str[0] = str[fill];
2067 str[fill] = '0';
2068 }
2069
2070 return u;
2071}
2072#endif
2073
2074static char isspace__doc__[] =
2075"S.isspace() -> int\n\
2076\n\
2077Return 1 if there are only whitespace characters in S,\n\
20780 otherwise.";
2079
2080static PyObject*
2081string_isspace(PyStringObject *self, PyObject *args)
2082{
Fred Drakeba096332000-07-09 07:04:36 +00002083 register const unsigned char *p
2084 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002085 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002086
2087 if (!PyArg_NoArgs(args))
2088 return NULL;
2089
2090 /* Shortcut for single character strings */
2091 if (PyString_GET_SIZE(self) == 1 &&
2092 isspace(*p))
2093 return PyInt_FromLong(1);
2094
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002095 /* Special case for empty strings */
2096 if (PyString_GET_SIZE(self) == 0)
2097 return PyInt_FromLong(0);
2098
Guido van Rossum4c08d552000-03-10 22:55:18 +00002099 e = p + PyString_GET_SIZE(self);
2100 for (; p < e; p++) {
2101 if (!isspace(*p))
2102 return PyInt_FromLong(0);
2103 }
2104 return PyInt_FromLong(1);
2105}
2106
2107
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002108static char isalpha__doc__[] =
2109"S.isalpha() -> int\n\
2110\n\
2111Return 1 if all characters in S are alphabetic\n\
2112and there is at least one character in S, 0 otherwise.";
2113
2114static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002115string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002116{
Fred Drakeba096332000-07-09 07:04:36 +00002117 register const unsigned char *p
2118 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002119 register const unsigned char *e;
2120
2121 if (!PyArg_NoArgs(args))
2122 return NULL;
2123
2124 /* Shortcut for single character strings */
2125 if (PyString_GET_SIZE(self) == 1 &&
2126 isalpha(*p))
2127 return PyInt_FromLong(1);
2128
2129 /* Special case for empty strings */
2130 if (PyString_GET_SIZE(self) == 0)
2131 return PyInt_FromLong(0);
2132
2133 e = p + PyString_GET_SIZE(self);
2134 for (; p < e; p++) {
2135 if (!isalpha(*p))
2136 return PyInt_FromLong(0);
2137 }
2138 return PyInt_FromLong(1);
2139}
2140
2141
2142static char isalnum__doc__[] =
2143"S.isalnum() -> int\n\
2144\n\
2145Return 1 if all characters in S are alphanumeric\n\
2146and there is at least one character in S, 0 otherwise.";
2147
2148static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002149string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002150{
Fred Drakeba096332000-07-09 07:04:36 +00002151 register const unsigned char *p
2152 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002153 register const unsigned char *e;
2154
2155 if (!PyArg_NoArgs(args))
2156 return NULL;
2157
2158 /* Shortcut for single character strings */
2159 if (PyString_GET_SIZE(self) == 1 &&
2160 isalnum(*p))
2161 return PyInt_FromLong(1);
2162
2163 /* Special case for empty strings */
2164 if (PyString_GET_SIZE(self) == 0)
2165 return PyInt_FromLong(0);
2166
2167 e = p + PyString_GET_SIZE(self);
2168 for (; p < e; p++) {
2169 if (!isalnum(*p))
2170 return PyInt_FromLong(0);
2171 }
2172 return PyInt_FromLong(1);
2173}
2174
2175
Guido van Rossum4c08d552000-03-10 22:55:18 +00002176static char isdigit__doc__[] =
2177"S.isdigit() -> int\n\
2178\n\
2179Return 1 if there are only digit characters in S,\n\
21800 otherwise.";
2181
2182static PyObject*
2183string_isdigit(PyStringObject *self, PyObject *args)
2184{
Fred Drakeba096332000-07-09 07:04:36 +00002185 register const unsigned char *p
2186 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002187 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188
2189 if (!PyArg_NoArgs(args))
2190 return NULL;
2191
2192 /* Shortcut for single character strings */
2193 if (PyString_GET_SIZE(self) == 1 &&
2194 isdigit(*p))
2195 return PyInt_FromLong(1);
2196
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002197 /* Special case for empty strings */
2198 if (PyString_GET_SIZE(self) == 0)
2199 return PyInt_FromLong(0);
2200
Guido van Rossum4c08d552000-03-10 22:55:18 +00002201 e = p + PyString_GET_SIZE(self);
2202 for (; p < e; p++) {
2203 if (!isdigit(*p))
2204 return PyInt_FromLong(0);
2205 }
2206 return PyInt_FromLong(1);
2207}
2208
2209
2210static char islower__doc__[] =
2211"S.islower() -> int\n\
2212\n\
2213Return 1 if all cased characters in S are lowercase and there is\n\
2214at least one cased character in S, 0 otherwise.";
2215
2216static PyObject*
2217string_islower(PyStringObject *self, PyObject *args)
2218{
Fred Drakeba096332000-07-09 07:04:36 +00002219 register const unsigned char *p
2220 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002221 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002222 int cased;
2223
2224 if (!PyArg_NoArgs(args))
2225 return NULL;
2226
2227 /* Shortcut for single character strings */
2228 if (PyString_GET_SIZE(self) == 1)
2229 return PyInt_FromLong(islower(*p) != 0);
2230
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002231 /* Special case for empty strings */
2232 if (PyString_GET_SIZE(self) == 0)
2233 return PyInt_FromLong(0);
2234
Guido van Rossum4c08d552000-03-10 22:55:18 +00002235 e = p + PyString_GET_SIZE(self);
2236 cased = 0;
2237 for (; p < e; p++) {
2238 if (isupper(*p))
2239 return PyInt_FromLong(0);
2240 else if (!cased && islower(*p))
2241 cased = 1;
2242 }
2243 return PyInt_FromLong(cased);
2244}
2245
2246
2247static char isupper__doc__[] =
2248"S.isupper() -> int\n\
2249\n\
2250Return 1 if all cased characters in S are uppercase and there is\n\
2251at least one cased character in S, 0 otherwise.";
2252
2253static PyObject*
2254string_isupper(PyStringObject *self, PyObject *args)
2255{
Fred Drakeba096332000-07-09 07:04:36 +00002256 register const unsigned char *p
2257 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002258 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002259 int cased;
2260
2261 if (!PyArg_NoArgs(args))
2262 return NULL;
2263
2264 /* Shortcut for single character strings */
2265 if (PyString_GET_SIZE(self) == 1)
2266 return PyInt_FromLong(isupper(*p) != 0);
2267
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002268 /* Special case for empty strings */
2269 if (PyString_GET_SIZE(self) == 0)
2270 return PyInt_FromLong(0);
2271
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 e = p + PyString_GET_SIZE(self);
2273 cased = 0;
2274 for (; p < e; p++) {
2275 if (islower(*p))
2276 return PyInt_FromLong(0);
2277 else if (!cased && isupper(*p))
2278 cased = 1;
2279 }
2280 return PyInt_FromLong(cased);
2281}
2282
2283
2284static char istitle__doc__[] =
2285"S.istitle() -> int\n\
2286\n\
2287Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2288may only follow uncased characters and lowercase characters only cased\n\
2289ones. Return 0 otherwise.";
2290
2291static PyObject*
2292string_istitle(PyStringObject *self, PyObject *args)
2293{
Fred Drakeba096332000-07-09 07:04:36 +00002294 register const unsigned char *p
2295 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002296 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002297 int cased, previous_is_cased;
2298
2299 if (!PyArg_NoArgs(args))
2300 return NULL;
2301
2302 /* Shortcut for single character strings */
2303 if (PyString_GET_SIZE(self) == 1)
2304 return PyInt_FromLong(isupper(*p) != 0);
2305
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002306 /* Special case for empty strings */
2307 if (PyString_GET_SIZE(self) == 0)
2308 return PyInt_FromLong(0);
2309
Guido van Rossum4c08d552000-03-10 22:55:18 +00002310 e = p + PyString_GET_SIZE(self);
2311 cased = 0;
2312 previous_is_cased = 0;
2313 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002314 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002315
2316 if (isupper(ch)) {
2317 if (previous_is_cased)
2318 return PyInt_FromLong(0);
2319 previous_is_cased = 1;
2320 cased = 1;
2321 }
2322 else if (islower(ch)) {
2323 if (!previous_is_cased)
2324 return PyInt_FromLong(0);
2325 previous_is_cased = 1;
2326 cased = 1;
2327 }
2328 else
2329 previous_is_cased = 0;
2330 }
2331 return PyInt_FromLong(cased);
2332}
2333
2334
2335static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002336"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002337\n\
2338Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002339Line breaks are not included in the resulting list unless keepends\n\
2340is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002341
2342#define SPLIT_APPEND(data, left, right) \
2343 str = PyString_FromStringAndSize(data + left, right - left); \
2344 if (!str) \
2345 goto onError; \
2346 if (PyList_Append(list, str)) { \
2347 Py_DECREF(str); \
2348 goto onError; \
2349 } \
2350 else \
2351 Py_DECREF(str);
2352
2353static PyObject*
2354string_splitlines(PyStringObject *self, PyObject *args)
2355{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002356 register int i;
2357 register int j;
2358 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002359 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002360 PyObject *list;
2361 PyObject *str;
2362 char *data;
2363
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002364 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002365 return NULL;
2366
2367 data = PyString_AS_STRING(self);
2368 len = PyString_GET_SIZE(self);
2369
Guido van Rossum4c08d552000-03-10 22:55:18 +00002370 list = PyList_New(0);
2371 if (!list)
2372 goto onError;
2373
2374 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002375 int eol;
2376
Guido van Rossum4c08d552000-03-10 22:55:18 +00002377 /* Find a line and append it */
2378 while (i < len && data[i] != '\n' && data[i] != '\r')
2379 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002380
2381 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002382 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002383 if (i < len) {
2384 if (data[i] == '\r' && i + 1 < len &&
2385 data[i+1] == '\n')
2386 i += 2;
2387 else
2388 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002389 if (keepends)
2390 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002391 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002392 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002393 j = i;
2394 }
2395 if (j < len) {
2396 SPLIT_APPEND(data, j, len);
2397 }
2398
2399 return list;
2400
2401 onError:
2402 Py_DECREF(list);
2403 return NULL;
2404}
2405
2406#undef SPLIT_APPEND
2407
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002408
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002409static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002410string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002411 /* Counterparts of the obsolete stropmodule functions; except
2412 string.maketrans(). */
2413 {"join", (PyCFunction)string_join, 1, join__doc__},
2414 {"split", (PyCFunction)string_split, 1, split__doc__},
2415 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2416 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2417 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2418 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2419 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2420 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2421 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002422 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2423 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002424 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2425 {"count", (PyCFunction)string_count, 1, count__doc__},
2426 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2427 {"find", (PyCFunction)string_find, 1, find__doc__},
2428 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002429 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002430 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2431 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2432 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2433 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002434 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2435 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2436 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002437 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2438 {"title", (PyCFunction)string_title, 1, title__doc__},
2439 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2440 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2441 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002442 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Marc-André Lemburg2d920412001-05-15 12:00:02 +00002443 {"decode", (PyCFunction)string_decode, 1, decode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002444 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2445 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2446#if 0
2447 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2448#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002449 {NULL, NULL} /* sentinel */
2450};
2451
2452static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002453string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002454{
2455 return Py_FindMethod(string_methods, (PyObject*)s, name);
2456}
2457
2458
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002459PyTypeObject PyString_Type = {
2460 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002461 0,
2462 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002463 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002464 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002465 (destructor)string_dealloc, /*tp_dealloc*/
2466 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002467 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002468 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002469 (cmpfunc)string_compare, /*tp_compare*/
2470 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002471 0, /*tp_as_number*/
2472 &string_as_sequence, /*tp_as_sequence*/
2473 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002474 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002475 0, /*tp_call*/
Guido van Rossum189f1df2001-05-01 16:51:53 +00002476 (reprfunc)string_str, /*tp_str*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002477 0, /*tp_getattro*/
2478 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002479 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002480 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002481 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002482};
2483
2484void
Fred Drakeba096332000-07-09 07:04:36 +00002485PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002486{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002487 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002488 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002489 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002490 if (w == NULL || !PyString_Check(*pv)) {
2491 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002492 *pv = NULL;
2493 return;
2494 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002495 v = string_concat((PyStringObject *) *pv, w);
2496 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002497 *pv = v;
2498}
2499
Guido van Rossum013142a1994-08-30 08:19:36 +00002500void
Fred Drakeba096332000-07-09 07:04:36 +00002501PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002502{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002503 PyString_Concat(pv, w);
2504 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002505}
2506
2507
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002508/* The following function breaks the notion that strings are immutable:
2509 it changes the size of a string. We get away with this only if there
2510 is only one module referencing the object. You can also think of it
2511 as creating a new string object and destroying the old one, only
2512 more efficiently. In any case, don't use this if the string may
2513 already be known to some other part of the code... */
2514
2515int
Fred Drakeba096332000-07-09 07:04:36 +00002516_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002517{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002518 register PyObject *v;
2519 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002520 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002521 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002522 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002523 Py_DECREF(v);
2524 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002525 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002526 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002527 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002528#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002529 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002530#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002531 _Py_ForgetReference(v);
2532 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002533 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002534 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002535 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002536 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002537 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002538 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002539 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002540 _Py_NewReference(*pv);
2541 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002542 sv->ob_size = newsize;
2543 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002544 return 0;
2545}
Guido van Rossume5372401993-03-16 12:15:04 +00002546
2547/* Helpers for formatstring */
2548
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002549static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002550getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002551{
2552 int argidx = *p_argidx;
2553 if (argidx < arglen) {
2554 (*p_argidx)++;
2555 if (arglen < 0)
2556 return args;
2557 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002558 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002559 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002560 PyErr_SetString(PyExc_TypeError,
2561 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002562 return NULL;
2563}
2564
Tim Peters38fd5b62000-09-21 05:43:11 +00002565/* Format codes
2566 * F_LJUST '-'
2567 * F_SIGN '+'
2568 * F_BLANK ' '
2569 * F_ALT '#'
2570 * F_ZERO '0'
2571 */
Guido van Rossume5372401993-03-16 12:15:04 +00002572#define F_LJUST (1<<0)
2573#define F_SIGN (1<<1)
2574#define F_BLANK (1<<2)
2575#define F_ALT (1<<3)
2576#define F_ZERO (1<<4)
2577
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002578static int
Fred Drakeba096332000-07-09 07:04:36 +00002579formatfloat(char *buf, size_t buflen, int flags,
2580 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002581{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002582 /* fmt = '%#.' + `prec` + `type`
2583 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002584 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002585 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002586 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002587 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002588 if (prec < 0)
2589 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002590 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2591 type = 'g';
2592 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002593 /* worst case length calc to ensure no buffer overrun:
2594 fmt = %#.<prec>g
2595 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002596 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002597 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2598 If prec=0 the effective precision is 1 (the leading digit is
2599 always given), therefore increase by one to 10+prec. */
2600 if (buflen <= (size_t)10 + (size_t)prec) {
2601 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002602 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002603 return -1;
2604 }
Guido van Rossume5372401993-03-16 12:15:04 +00002605 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002606 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002607}
2608
Tim Peters38fd5b62000-09-21 05:43:11 +00002609/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2610 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2611 * Python's regular ints.
2612 * Return value: a new PyString*, or NULL if error.
2613 * . *pbuf is set to point into it,
2614 * *plen set to the # of chars following that.
2615 * Caller must decref it when done using pbuf.
2616 * The string starting at *pbuf is of the form
2617 * "-"? ("0x" | "0X")? digit+
2618 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002619 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002620 * There will be at least prec digits, zero-filled on the left if
2621 * necessary to get that many.
2622 * val object to be converted
2623 * flags bitmask of format flags; only F_ALT is looked at
2624 * prec minimum number of digits; 0-fill on left if needed
2625 * type a character in [duoxX]; u acts the same as d
2626 *
2627 * CAUTION: o, x and X conversions on regular ints can never
2628 * produce a '-' sign, but can for Python's unbounded ints.
2629 */
2630PyObject*
2631_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2632 char **pbuf, int *plen)
2633{
2634 PyObject *result = NULL;
2635 char *buf;
2636 int i;
2637 int sign; /* 1 if '-', else 0 */
2638 int len; /* number of characters */
2639 int numdigits; /* len == numnondigits + numdigits */
2640 int numnondigits = 0;
2641
2642 switch (type) {
2643 case 'd':
2644 case 'u':
2645 result = val->ob_type->tp_str(val);
2646 break;
2647 case 'o':
2648 result = val->ob_type->tp_as_number->nb_oct(val);
2649 break;
2650 case 'x':
2651 case 'X':
2652 numnondigits = 2;
2653 result = val->ob_type->tp_as_number->nb_hex(val);
2654 break;
2655 default:
2656 assert(!"'type' not in [duoxX]");
2657 }
2658 if (!result)
2659 return NULL;
2660
2661 /* To modify the string in-place, there can only be one reference. */
2662 if (result->ob_refcnt != 1) {
2663 PyErr_BadInternalCall();
2664 return NULL;
2665 }
2666 buf = PyString_AsString(result);
2667 len = PyString_Size(result);
2668 if (buf[len-1] == 'L') {
2669 --len;
2670 buf[len] = '\0';
2671 }
2672 sign = buf[0] == '-';
2673 numnondigits += sign;
2674 numdigits = len - numnondigits;
2675 assert(numdigits > 0);
2676
Tim Petersfff53252001-04-12 18:38:48 +00002677 /* Get rid of base marker unless F_ALT */
2678 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002679 /* Need to skip 0x, 0X or 0. */
2680 int skipped = 0;
2681 switch (type) {
2682 case 'o':
2683 assert(buf[sign] == '0');
2684 /* If 0 is only digit, leave it alone. */
2685 if (numdigits > 1) {
2686 skipped = 1;
2687 --numdigits;
2688 }
2689 break;
2690 case 'x':
2691 case 'X':
2692 assert(buf[sign] == '0');
2693 assert(buf[sign + 1] == 'x');
2694 skipped = 2;
2695 numnondigits -= 2;
2696 break;
2697 }
2698 if (skipped) {
2699 buf += skipped;
2700 len -= skipped;
2701 if (sign)
2702 buf[0] = '-';
2703 }
2704 assert(len == numnondigits + numdigits);
2705 assert(numdigits > 0);
2706 }
2707
2708 /* Fill with leading zeroes to meet minimum width. */
2709 if (prec > numdigits) {
2710 PyObject *r1 = PyString_FromStringAndSize(NULL,
2711 numnondigits + prec);
2712 char *b1;
2713 if (!r1) {
2714 Py_DECREF(result);
2715 return NULL;
2716 }
2717 b1 = PyString_AS_STRING(r1);
2718 for (i = 0; i < numnondigits; ++i)
2719 *b1++ = *buf++;
2720 for (i = 0; i < prec - numdigits; i++)
2721 *b1++ = '0';
2722 for (i = 0; i < numdigits; i++)
2723 *b1++ = *buf++;
2724 *b1 = '\0';
2725 Py_DECREF(result);
2726 result = r1;
2727 buf = PyString_AS_STRING(result);
2728 len = numnondigits + prec;
2729 }
2730
2731 /* Fix up case for hex conversions. */
2732 switch (type) {
2733 case 'x':
2734 /* Need to convert all upper case letters to lower case. */
2735 for (i = 0; i < len; i++)
2736 if (buf[i] >= 'A' && buf[i] <= 'F')
2737 buf[i] += 'a'-'A';
2738 break;
2739 case 'X':
2740 /* Need to convert 0x to 0X (and -0x to -0X). */
2741 if (buf[sign + 1] == 'x')
2742 buf[sign + 1] = 'X';
2743 break;
2744 }
2745 *pbuf = buf;
2746 *plen = len;
2747 return result;
2748}
2749
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002750static int
Fred Drakeba096332000-07-09 07:04:36 +00002751formatint(char *buf, size_t buflen, int flags,
2752 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002753{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002754 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002755 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2756 + 1 + 1 = 24 */
2757 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002758 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002759 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002760 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002761 if (prec < 0)
2762 prec = 1;
2763 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002764 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002765 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002766 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002767 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002768 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002769 return -1;
2770 }
Guido van Rossume5372401993-03-16 12:15:04 +00002771 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002772 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2773 * but we want it (for consistency with other %#x conversions, and
2774 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002775 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2776 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2777 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002778 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002779 if (x == 0 &&
2780 (flags & F_ALT) &&
2781 (type == 'x' || type == 'X') &&
2782 buf[1] != (char)type) /* this last always true under std C */
2783 {
Tim Petersfff53252001-04-12 18:38:48 +00002784 memmove(buf+2, buf, strlen(buf) + 1);
2785 buf[0] = '0';
2786 buf[1] = (char)type;
2787 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002788 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002789}
2790
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002791static int
Fred Drakeba096332000-07-09 07:04:36 +00002792formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002793{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002794 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002795 if (PyString_Check(v)) {
2796 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002797 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002798 }
2799 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002800 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002801 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002802 }
2803 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002804 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002805}
2806
Guido van Rossum013142a1994-08-30 08:19:36 +00002807
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002808/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2809
2810 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2811 chars are formatted. XXX This is a magic number. Each formatting
2812 routine does bounds checking to ensure no overflow, but a better
2813 solution may be to malloc a buffer of appropriate size for each
2814 format. For now, the current solution is sufficient.
2815*/
2816#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002817
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002818PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002819PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002820{
2821 char *fmt, *res;
2822 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002823 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002824 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002825 PyObject *dict = NULL;
2826 if (format == NULL || !PyString_Check(format) || args == NULL) {
2827 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002828 return NULL;
2829 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002830 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002831 fmt = PyString_AsString(format);
2832 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002833 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002834 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002835 if (result == NULL)
2836 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002837 res = PyString_AsString(result);
2838 if (PyTuple_Check(args)) {
2839 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002840 argidx = 0;
2841 }
2842 else {
2843 arglen = -1;
2844 argidx = -2;
2845 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002846 if (args->ob_type->tp_as_mapping)
2847 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002848 while (--fmtcnt >= 0) {
2849 if (*fmt != '%') {
2850 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002851 rescnt = fmtcnt + 100;
2852 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002853 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002854 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002855 res = PyString_AsString(result)
2856 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002857 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002858 }
2859 *res++ = *fmt++;
2860 }
2861 else {
2862 /* Got a format specifier */
2863 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002864 int width = -1;
2865 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00002866 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002867 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002868 PyObject *v = NULL;
2869 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002870 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002871 int sign;
2872 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002873 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002874 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002875 int argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002876
Guido van Rossumda9c2711996-12-05 21:58:58 +00002877 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002878 if (*fmt == '(') {
2879 char *keystart;
2880 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002881 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002882 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002883
2884 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002885 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002886 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002887 goto error;
2888 }
2889 ++fmt;
2890 --fmtcnt;
2891 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002892 /* Skip over balanced parentheses */
2893 while (pcount > 0 && --fmtcnt >= 0) {
2894 if (*fmt == ')')
2895 --pcount;
2896 else if (*fmt == '(')
2897 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002898 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002899 }
2900 keylen = fmt - keystart - 1;
2901 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002902 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002903 "incomplete format key");
2904 goto error;
2905 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002906 key = PyString_FromStringAndSize(keystart,
2907 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002908 if (key == NULL)
2909 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002910 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002911 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002912 args_owned = 0;
2913 }
2914 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002915 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002916 if (args == NULL) {
2917 goto error;
2918 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002919 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002920 arglen = -1;
2921 argidx = -2;
2922 }
Guido van Rossume5372401993-03-16 12:15:04 +00002923 while (--fmtcnt >= 0) {
2924 switch (c = *fmt++) {
2925 case '-': flags |= F_LJUST; continue;
2926 case '+': flags |= F_SIGN; continue;
2927 case ' ': flags |= F_BLANK; continue;
2928 case '#': flags |= F_ALT; continue;
2929 case '0': flags |= F_ZERO; continue;
2930 }
2931 break;
2932 }
2933 if (c == '*') {
2934 v = getnextarg(args, arglen, &argidx);
2935 if (v == NULL)
2936 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002937 if (!PyInt_Check(v)) {
2938 PyErr_SetString(PyExc_TypeError,
2939 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002940 goto error;
2941 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002942 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002943 if (width < 0) {
2944 flags |= F_LJUST;
2945 width = -width;
2946 }
Guido van Rossume5372401993-03-16 12:15:04 +00002947 if (--fmtcnt >= 0)
2948 c = *fmt++;
2949 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002950 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002951 width = c - '0';
2952 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002953 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002954 if (!isdigit(c))
2955 break;
2956 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002957 PyErr_SetString(
2958 PyExc_ValueError,
2959 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002960 goto error;
2961 }
2962 width = width*10 + (c - '0');
2963 }
2964 }
2965 if (c == '.') {
2966 prec = 0;
2967 if (--fmtcnt >= 0)
2968 c = *fmt++;
2969 if (c == '*') {
2970 v = getnextarg(args, arglen, &argidx);
2971 if (v == NULL)
2972 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002973 if (!PyInt_Check(v)) {
2974 PyErr_SetString(
2975 PyExc_TypeError,
2976 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002977 goto error;
2978 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002979 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002980 if (prec < 0)
2981 prec = 0;
2982 if (--fmtcnt >= 0)
2983 c = *fmt++;
2984 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002985 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002986 prec = c - '0';
2987 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002988 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002989 if (!isdigit(c))
2990 break;
2991 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002992 PyErr_SetString(
2993 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002994 "prec too big");
2995 goto error;
2996 }
2997 prec = prec*10 + (c - '0');
2998 }
2999 }
3000 } /* prec */
3001 if (fmtcnt >= 0) {
3002 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00003003 if (--fmtcnt >= 0)
3004 c = *fmt++;
3005 }
3006 }
3007 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003008 PyErr_SetString(PyExc_ValueError,
3009 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00003010 goto error;
3011 }
3012 if (c != '%') {
3013 v = getnextarg(args, arglen, &argidx);
3014 if (v == NULL)
3015 goto error;
3016 }
3017 sign = 0;
3018 fill = ' ';
3019 switch (c) {
3020 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003021 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00003022 len = 1;
3023 break;
3024 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00003025 case 'r':
3026 if (PyUnicode_Check(v)) {
3027 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003028 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00003029 goto unicode;
3030 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003031 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003032 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00003033 else
3034 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00003035 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00003036 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00003037 if (!PyString_Check(temp)) {
3038 PyErr_SetString(PyExc_TypeError,
3039 "%s argument has non-string str()");
3040 goto error;
3041 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003042 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003043 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003044 if (prec >= 0 && len > prec)
3045 len = prec;
3046 break;
3047 case 'i':
3048 case 'd':
3049 case 'u':
3050 case 'o':
3051 case 'x':
3052 case 'X':
3053 if (c == 'i')
3054 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00003055 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00003056 temp = _PyString_FormatLong(v, flags,
3057 prec, c, &pbuf, &len);
3058 if (!temp)
3059 goto error;
3060 /* unbounded ints can always produce
3061 a sign character! */
3062 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00003063 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003064 else {
3065 pbuf = formatbuf;
3066 len = formatint(pbuf, sizeof(formatbuf),
3067 flags, prec, c, v);
3068 if (len < 0)
3069 goto error;
3070 /* only d conversion is signed */
3071 sign = c == 'd';
3072 }
3073 if (flags & F_ZERO)
3074 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003075 break;
3076 case 'e':
3077 case 'E':
3078 case 'f':
3079 case 'g':
3080 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003081 pbuf = formatbuf;
3082 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003083 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003084 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003085 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003086 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003087 fill = '0';
3088 break;
3089 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003090 pbuf = formatbuf;
3091 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003092 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003093 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003094 break;
3095 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003096 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003097 "unsupported format character '%c' (0x%x) "
3098 "at index %i",
3099 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003100 goto error;
3101 }
3102 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003103 if (*pbuf == '-' || *pbuf == '+') {
3104 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003105 len--;
3106 }
3107 else if (flags & F_SIGN)
3108 sign = '+';
3109 else if (flags & F_BLANK)
3110 sign = ' ';
3111 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003112 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003113 }
3114 if (width < len)
3115 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003116 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003117 reslen -= rescnt;
3118 rescnt = width + fmtcnt + 100;
3119 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003120 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003121 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003122 res = PyString_AsString(result)
3123 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003124 }
3125 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003126 if (fill != ' ')
3127 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003128 rescnt--;
3129 if (width > len)
3130 width--;
3131 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003132 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3133 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003134 assert(pbuf[1] == c);
3135 if (fill != ' ') {
3136 *res++ = *pbuf++;
3137 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003138 }
Tim Petersfff53252001-04-12 18:38:48 +00003139 rescnt -= 2;
3140 width -= 2;
3141 if (width < 0)
3142 width = 0;
3143 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003144 }
3145 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003146 do {
3147 --rescnt;
3148 *res++ = fill;
3149 } while (--width > len);
3150 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003151 if (fill == ' ') {
3152 if (sign)
3153 *res++ = sign;
3154 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003155 (c == 'x' || c == 'X')) {
3156 assert(pbuf[0] == '0');
3157 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003158 *res++ = *pbuf++;
3159 *res++ = *pbuf++;
3160 }
3161 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003162 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003163 res += len;
3164 rescnt -= len;
3165 while (--width >= len) {
3166 --rescnt;
3167 *res++ = ' ';
3168 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003169 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003170 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003171 "not all arguments converted");
3172 goto error;
3173 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003174 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003175 } /* '%' */
3176 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003177 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003178 PyErr_SetString(PyExc_TypeError,
3179 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003180 goto error;
3181 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003182 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003183 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003184 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003185 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003186 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003187
3188 unicode:
3189 if (args_owned) {
3190 Py_DECREF(args);
3191 args_owned = 0;
3192 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003193 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003194 if (PyTuple_Check(orig_args) && argidx > 0) {
3195 PyObject *v;
3196 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3197 v = PyTuple_New(n);
3198 if (v == NULL)
3199 goto error;
3200 while (--n >= 0) {
3201 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3202 Py_INCREF(w);
3203 PyTuple_SET_ITEM(v, n, w);
3204 }
3205 args = v;
3206 } else {
3207 Py_INCREF(orig_args);
3208 args = orig_args;
3209 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003210 args_owned = 1;
3211 /* Take what we have of the result and let the Unicode formatting
3212 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003213 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003214 if (_PyString_Resize(&result, rescnt))
3215 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003216 fmtcnt = PyString_GET_SIZE(format) - \
3217 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003218 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3219 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003220 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003221 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003222 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003223 if (v == NULL)
3224 goto error;
3225 /* Paste what we have (result) to what the Unicode formatting
3226 function returned (v) and return the result (or error) */
3227 w = PyUnicode_Concat(result, v);
3228 Py_DECREF(result);
3229 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003230 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003231 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003232
Guido van Rossume5372401993-03-16 12:15:04 +00003233 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003234 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003235 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003236 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003237 }
Guido van Rossume5372401993-03-16 12:15:04 +00003238 return NULL;
3239}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003240
3241
3242#ifdef INTERN_STRINGS
3243
Barry Warsaw4df762f2000-08-16 23:41:01 +00003244/* This dictionary will leak at PyString_Fini() time. That's acceptable
3245 * because PyString_Fini() specifically frees interned strings that are
3246 * only referenced by this dictionary. The CVS log entry for revision 2.45
3247 * says:
3248 *
3249 * Change the Fini function to only remove otherwise unreferenced
3250 * strings from the interned table. There are references in
3251 * hard-to-find static variables all over the interpreter, and it's not
3252 * worth trying to get rid of all those; but "uninterning" isn't fair
3253 * either and may cause subtle failures later -- so we have to keep them
3254 * in the interned table.
3255 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003256static PyObject *interned;
3257
3258void
Fred Drakeba096332000-07-09 07:04:36 +00003259PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003260{
3261 register PyStringObject *s = (PyStringObject *)(*p);
3262 PyObject *t;
3263 if (s == NULL || !PyString_Check(s))
3264 Py_FatalError("PyString_InternInPlace: strings only please!");
3265 if ((t = s->ob_sinterned) != NULL) {
3266 if (t == (PyObject *)s)
3267 return;
3268 Py_INCREF(t);
3269 *p = t;
3270 Py_DECREF(s);
3271 return;
3272 }
3273 if (interned == NULL) {
3274 interned = PyDict_New();
3275 if (interned == NULL)
3276 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003277 }
3278 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3279 Py_INCREF(t);
3280 *p = s->ob_sinterned = t;
3281 Py_DECREF(s);
3282 return;
3283 }
3284 t = (PyObject *)s;
3285 if (PyDict_SetItem(interned, t, t) == 0) {
3286 s->ob_sinterned = t;
3287 return;
3288 }
3289 PyErr_Clear();
3290}
3291
3292
3293PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003294PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003295{
3296 PyObject *s = PyString_FromString(cp);
3297 if (s == NULL)
3298 return NULL;
3299 PyString_InternInPlace(&s);
3300 return s;
3301}
3302
3303#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003304
3305void
Fred Drakeba096332000-07-09 07:04:36 +00003306PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003307{
3308 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003309 for (i = 0; i < UCHAR_MAX + 1; i++) {
3310 Py_XDECREF(characters[i]);
3311 characters[i] = NULL;
3312 }
3313#ifndef DONT_SHARE_SHORT_STRINGS
3314 Py_XDECREF(nullstring);
3315 nullstring = NULL;
3316#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003317#ifdef INTERN_STRINGS
3318 if (interned) {
3319 int pos, changed;
3320 PyObject *key, *value;
3321 do {
3322 changed = 0;
3323 pos = 0;
3324 while (PyDict_Next(interned, &pos, &key, &value)) {
3325 if (key->ob_refcnt == 2 && key == value) {
3326 PyDict_DelItem(interned, key);
3327 changed = 1;
3328 }
3329 }
3330 } while (changed);
3331 }
3332#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003333}
Barry Warsawa903ad982001-02-23 16:40:48 +00003334
3335#ifdef INTERN_STRINGS
3336void _Py_ReleaseInternedStrings(void)
3337{
3338 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003339 fprintf(stderr, "releasing interned strings\n");
3340 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003341 Py_DECREF(interned);
3342 interned = NULL;
3343 }
3344}
3345#endif /* INTERN_STRINGS */