blob: 7600c0378be0153eb06e1cb61bb2f36cd5d1ba65 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Tim Peters9e897f42001-05-09 07:37:07 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +000076 PyObject *t = (PyObject *)op;
77 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000078 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000079 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 } else if (size == 1 && str != NULL) {
Tim Peters9e897f42001-05-09 07:37:07 +000082 PyObject *t = (PyObject *)op;
83 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +000084 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000085 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000087 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000088#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000089 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000090}
91
Guido van Rossumc0b618a1997-05-02 03:12:38 +000092PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000093PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000094{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000095 register size_t size = strlen(str);
Tim Peters9e897f42001-05-09 07:37:07 +000096 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000097 if (size > INT_MAX) {
98 PyErr_SetString(PyExc_OverflowError,
99 "string is too long for a Python string");
100 return NULL;
101 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000102#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 if (size == 0 && (op = nullstring) != NULL) {
104#ifdef COUNT_ALLOCS
105 null_strings++;
106#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000107 Py_INCREF(op);
108 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000109 }
110 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
111#ifdef COUNT_ALLOCS
112 one_strings++;
113#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 Py_INCREF(op);
115 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000116 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000117#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118
119 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000120 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000121 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000122 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000123 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000124 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000125#ifdef CACHE_HASH
126 op->ob_shash = -1;
127#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000128#ifdef INTERN_STRINGS
129 op->ob_sinterned = NULL;
130#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000131 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000132#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 if (size == 0) {
Tim Peters9e897f42001-05-09 07:37:07 +0000134 PyObject *t = (PyObject *)op;
135 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000136 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000137 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000138 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000139 } else if (size == 1) {
Tim Peters9e897f42001-05-09 07:37:07 +0000140 PyObject *t = (PyObject *)op;
141 PyString_InternInPlace(&t);
Tim Peters4862ab72001-05-09 08:43:21 +0000142 op = (PyStringObject *)t;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000143 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000144 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000145 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000146#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000147 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000148}
149
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000150PyObject *PyString_Decode(const char *s,
151 int size,
152 const char *encoding,
153 const char *errors)
154{
155 PyObject *buffer = NULL, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000156
157 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000158 encoding = PyUnicode_GetDefaultEncoding();
159
160 /* Decode via the codec registry */
161 buffer = PyBuffer_FromMemory((void *)s, size);
162 if (buffer == NULL)
163 goto onError;
164 str = PyCodec_Decode(buffer, encoding, errors);
165 if (str == NULL)
166 goto onError;
167 /* Convert Unicode to a string using the default encoding */
168 if (PyUnicode_Check(str)) {
169 PyObject *temp = str;
170 str = PyUnicode_AsEncodedString(str, NULL, NULL);
171 Py_DECREF(temp);
172 if (str == NULL)
173 goto onError;
174 }
175 if (!PyString_Check(str)) {
176 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000177 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000178 str->ob_type->tp_name);
179 Py_DECREF(str);
180 goto onError;
181 }
182 Py_DECREF(buffer);
183 return str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000184
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000185 onError:
186 Py_XDECREF(buffer);
187 return NULL;
188}
189
190PyObject *PyString_Encode(const char *s,
191 int size,
192 const char *encoding,
193 const char *errors)
194{
195 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000196
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000197 str = PyString_FromStringAndSize(s, size);
198 if (str == NULL)
199 return NULL;
200 v = PyString_AsEncodedString(str, encoding, errors);
201 Py_DECREF(str);
202 return v;
203}
204
205PyObject *PyString_AsEncodedString(PyObject *str,
206 const char *encoding,
207 const char *errors)
208{
209 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000210
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000211 if (!PyString_Check(str)) {
212 PyErr_BadArgument();
213 goto onError;
214 }
215
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000216 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000217 encoding = PyUnicode_GetDefaultEncoding();
218
219 /* Encode via the codec registry */
220 v = PyCodec_Encode(str, encoding, errors);
221 if (v == NULL)
222 goto onError;
223 /* Convert Unicode to a string using the default encoding */
224 if (PyUnicode_Check(v)) {
225 PyObject *temp = v;
226 v = PyUnicode_AsEncodedString(v, NULL, NULL);
227 Py_DECREF(temp);
228 if (v == NULL)
229 goto onError;
230 }
231 if (!PyString_Check(v)) {
232 PyErr_Format(PyExc_TypeError,
233 "encoder did not return a string object (type=%.400s)",
234 v->ob_type->tp_name);
235 Py_DECREF(v);
236 goto onError;
237 }
238 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000239
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000240 onError:
241 return NULL;
242}
243
Guido van Rossum234f9421993-06-17 12:35:49 +0000244static void
Fred Drakeba096332000-07-09 07:04:36 +0000245string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000246{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000247 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000248}
249
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000250static int
251string_getsize(register PyObject *op)
252{
253 char *s;
254 int len;
255 if (PyString_AsStringAndSize(op, &s, &len))
256 return -1;
257 return len;
258}
259
260static /*const*/ char *
261string_getbuffer(register PyObject *op)
262{
263 char *s;
264 int len;
265 if (PyString_AsStringAndSize(op, &s, &len))
266 return NULL;
267 return s;
268}
269
Guido van Rossumd7047b31995-01-02 19:07:15 +0000270int
Fred Drakeba096332000-07-09 07:04:36 +0000271PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000273 if (!PyString_Check(op))
274 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000275 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000276}
277
278/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000279PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000280{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000281 if (!PyString_Check(op))
282 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000283 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000284}
285
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000286/* Internal API needed by PyString_AsStringAndSize(): */
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000287extern
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000288PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
289 const char *errors);
290
291int
292PyString_AsStringAndSize(register PyObject *obj,
293 register char **s,
294 register int *len)
295{
296 if (s == NULL) {
297 PyErr_BadInternalCall();
298 return -1;
299 }
300
301 if (!PyString_Check(obj)) {
302 if (PyUnicode_Check(obj)) {
303 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
304 if (obj == NULL)
305 return -1;
306 }
307 else {
308 PyErr_Format(PyExc_TypeError,
309 "expected string or Unicode object, "
310 "%.200s found", obj->ob_type->tp_name);
311 return -1;
312 }
313 }
314
315 *s = PyString_AS_STRING(obj);
316 if (len != NULL)
317 *len = PyString_GET_SIZE(obj);
318 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
319 PyErr_SetString(PyExc_TypeError,
320 "expected string without null bytes");
321 return -1;
322 }
323 return 0;
324}
325
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326/* Methods */
327
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000328static int
Fred Drakeba096332000-07-09 07:04:36 +0000329string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000330{
331 int i;
332 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000333 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000334 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000335 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000336 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000337 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000338 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000339
Thomas Wouters7e474022000-07-16 12:04:32 +0000340 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000341 quote = '\'';
342 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
343 quote = '"';
344
345 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000346 for (i = 0; i < op->ob_size; i++) {
347 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000348 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000349 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000350 else if (c == '\t')
351 fprintf(fp, "\\t");
352 else if (c == '\n')
353 fprintf(fp, "\\n");
354 else if (c == '\r')
355 fprintf(fp, "\\r");
356 else if (c < ' ' || c >= 0x7f)
357 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000358 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000359 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000360 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000361 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000362 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363}
364
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000365static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000366string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000367{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000368 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
369 PyObject *v;
370 if (newsize > INT_MAX) {
371 PyErr_SetString(PyExc_OverflowError,
372 "string is too large to make repr");
373 }
374 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000375 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000376 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000377 }
378 else {
379 register int i;
380 register char c;
381 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000382 int quote;
383
Thomas Wouters7e474022000-07-16 12:04:32 +0000384 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000385 quote = '\'';
386 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
387 quote = '"';
388
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000389 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000390 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000391 for (i = 0; i < op->ob_size; i++) {
392 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000393 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000394 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000395 else if (c == '\t')
396 *p++ = '\\', *p++ = 't';
397 else if (c == '\n')
398 *p++ = '\\', *p++ = 'n';
399 else if (c == '\r')
400 *p++ = '\\', *p++ = 'r';
401 else if (c < ' ' || c >= 0x7f) {
402 sprintf(p, "\\x%02x", c & 0xff);
403 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000404 }
405 else
406 *p++ = c;
407 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000408 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000409 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000410 _PyString_Resize(
411 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000412 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000414}
415
Guido van Rossum189f1df2001-05-01 16:51:53 +0000416static PyObject *
417string_str(PyObject *s)
418{
419 Py_INCREF(s);
420 return s;
421}
422
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000423static int
Fred Drakeba096332000-07-09 07:04:36 +0000424string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000425{
426 return a->ob_size;
427}
428
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000429static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000430string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000431{
432 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 register PyStringObject *op;
434 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000435 if (PyUnicode_Check(bb))
436 return PyUnicode_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000437 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000438 "cannot add type \"%.200s\" to string",
439 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000440 return NULL;
441 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000442#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000443 /* Optimize cases with empty left or right operand */
444 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000445 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000446 return bb;
447 }
448 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000449 Py_INCREF(a);
450 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000451 }
452 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000453 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000454 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000455 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000456 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000457 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000458 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000459#ifdef CACHE_HASH
460 op->ob_shash = -1;
461#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000462#ifdef INTERN_STRINGS
463 op->ob_sinterned = NULL;
464#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000465 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
466 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
467 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000468 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000469#undef b
470}
471
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000472static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000473string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000474{
475 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000476 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000477 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000478 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000479 if (n < 0)
480 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000481 /* watch out for overflows: the size can overflow int,
482 * and the # of bytes needed can overflow size_t
483 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000484 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000485 if (n && size / n != a->ob_size) {
486 PyErr_SetString(PyExc_OverflowError,
487 "repeated string is too long");
488 return NULL;
489 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000490 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000491 Py_INCREF(a);
492 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000493 }
Tim Peters8f422462000-09-09 06:13:41 +0000494 nbytes = size * sizeof(char);
495 if (nbytes / sizeof(char) != (size_t)size ||
496 nbytes + sizeof(PyStringObject) <= nbytes) {
497 PyErr_SetString(PyExc_OverflowError,
498 "repeated string is too long");
499 return NULL;
500 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000501 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000502 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000503 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000504 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000505 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000506#ifdef CACHE_HASH
507 op->ob_shash = -1;
508#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000509#ifdef INTERN_STRINGS
510 op->ob_sinterned = NULL;
511#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000512 for (i = 0; i < size; i += a->ob_size)
513 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
514 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000515 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000516}
517
518/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
519
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000520static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000521string_slice(register PyStringObject *a, register int i, register int j)
522 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000523{
524 if (i < 0)
525 i = 0;
526 if (j < 0)
527 j = 0; /* Avoid signed/unsigned bug in next line */
528 if (j > a->ob_size)
529 j = a->ob_size;
530 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000531 Py_INCREF(a);
532 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000533 }
534 if (j < i)
535 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000536 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000537}
538
Guido van Rossum9284a572000-03-07 15:53:43 +0000539static int
Fred Drakeba096332000-07-09 07:04:36 +0000540string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000541{
542 register char *s, *end;
543 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000544 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000545 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000546 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000547 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000548 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000549 return -1;
550 }
551 c = PyString_AsString(el)[0];
552 s = PyString_AsString(a);
553 end = s + PyString_Size(a);
554 while (s < end) {
555 if (c == *s++)
556 return 1;
557 }
558 return 0;
559}
560
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000561static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000562string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000563{
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000564 PyObject *v;
Tim Peters5b4d4772001-05-08 22:33:50 +0000565 char *pchar;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000566 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000567 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000568 return NULL;
569 }
Tim Peters5b4d4772001-05-08 22:33:50 +0000570 pchar = a->ob_sval + i;
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000571 v = (PyObject *)characters[*pchar & UCHAR_MAX];
Tim Peters5b4d4772001-05-08 22:33:50 +0000572 if (v == NULL)
573 v = PyString_FromStringAndSize(pchar, 1);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000574 else {
575#ifdef COUNT_ALLOCS
576 one_strings++;
577#endif
Tim Peterscf5ad5d2001-05-09 00:24:55 +0000578 Py_INCREF(v);
Tim Petersb4bbcd72001-05-09 00:31:40 +0000579 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000580 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000581}
582
583static int
Fred Drakeba096332000-07-09 07:04:36 +0000584string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000585{
Guido van Rossum253919f1991-02-13 23:18:39 +0000586 int len_a = a->ob_size, len_b = b->ob_size;
587 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000588 int cmp;
589 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000590 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000591 if (cmp == 0)
592 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
593 if (cmp != 0)
594 return cmp;
595 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000596 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000597}
598
Guido van Rossum9bfef441993-03-29 10:43:31 +0000599static long
Fred Drakeba096332000-07-09 07:04:36 +0000600string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000601{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000602 register int len;
603 register unsigned char *p;
604 register long x;
605
606#ifdef CACHE_HASH
607 if (a->ob_shash != -1)
608 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000609#ifdef INTERN_STRINGS
610 if (a->ob_sinterned != NULL)
611 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000612 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000613#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000614#endif
615 len = a->ob_size;
616 p = (unsigned char *) a->ob_sval;
617 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000618 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000619 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000620 x ^= a->ob_size;
621 if (x == -1)
622 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000623#ifdef CACHE_HASH
624 a->ob_shash = x;
625#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000626 return x;
627}
628
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000629static int
Fred Drakeba096332000-07-09 07:04:36 +0000630string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000631{
632 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000633 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000634 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000635 return -1;
636 }
637 *ptr = (void *)self->ob_sval;
638 return self->ob_size;
639}
640
641static int
Fred Drakeba096332000-07-09 07:04:36 +0000642string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000643{
Guido van Rossum045e6881997-09-08 18:30:11 +0000644 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000645 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000646 return -1;
647}
648
649static int
Fred Drakeba096332000-07-09 07:04:36 +0000650string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000651{
652 if ( lenp )
653 *lenp = self->ob_size;
654 return 1;
655}
656
Guido van Rossum1db70701998-10-08 02:18:52 +0000657static int
Fred Drakeba096332000-07-09 07:04:36 +0000658string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000659{
660 if ( index != 0 ) {
661 PyErr_SetString(PyExc_SystemError,
662 "accessing non-existent string segment");
663 return -1;
664 }
665 *ptr = self->ob_sval;
666 return self->ob_size;
667}
668
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000669static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000670 (inquiry)string_length, /*sq_length*/
671 (binaryfunc)string_concat, /*sq_concat*/
672 (intargfunc)string_repeat, /*sq_repeat*/
673 (intargfunc)string_item, /*sq_item*/
674 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000675 0, /*sq_ass_item*/
676 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000677 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000678};
679
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000680static PyBufferProcs string_as_buffer = {
681 (getreadbufferproc)string_buffer_getreadbuf,
682 (getwritebufferproc)string_buffer_getwritebuf,
683 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000684 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000685};
686
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000687
688
689#define LEFTSTRIP 0
690#define RIGHTSTRIP 1
691#define BOTHSTRIP 2
692
693
694static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000695split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000696{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000697 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000698 PyObject* item;
699 PyObject *list = PyList_New(0);
700
701 if (list == NULL)
702 return NULL;
703
Guido van Rossum4c08d552000-03-10 22:55:18 +0000704 for (i = j = 0; i < len; ) {
705 while (i < len && isspace(Py_CHARMASK(s[i])))
706 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000707 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000708 while (i < len && !isspace(Py_CHARMASK(s[i])))
709 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000710 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000711 if (maxsplit-- <= 0)
712 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000713 item = PyString_FromStringAndSize(s+j, (int)(i-j));
714 if (item == NULL)
715 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000716 err = PyList_Append(list, item);
717 Py_DECREF(item);
718 if (err < 0)
719 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000720 while (i < len && isspace(Py_CHARMASK(s[i])))
721 i++;
722 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000723 }
724 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000725 if (j < len) {
726 item = PyString_FromStringAndSize(s+j, (int)(len - j));
727 if (item == NULL)
728 goto finally;
729 err = PyList_Append(list, item);
730 Py_DECREF(item);
731 if (err < 0)
732 goto finally;
733 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000734 return list;
735 finally:
736 Py_DECREF(list);
737 return NULL;
738}
739
740
741static char split__doc__[] =
742"S.split([sep [,maxsplit]]) -> list of strings\n\
743\n\
744Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000745delimiter string. If maxsplit is given, at most maxsplit\n\
746splits are done. If sep is not specified, any whitespace string\n\
747is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000748
749static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000750string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000751{
752 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000753 int maxsplit = -1;
754 const char *s = PyString_AS_STRING(self), *sub;
755 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000756
Guido van Rossum4c08d552000-03-10 22:55:18 +0000757 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000758 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000759 if (maxsplit < 0)
760 maxsplit = INT_MAX;
761 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000762 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000763 if (PyString_Check(subobj)) {
764 sub = PyString_AS_STRING(subobj);
765 n = PyString_GET_SIZE(subobj);
766 }
767 else if (PyUnicode_Check(subobj))
768 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
769 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
770 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000771 if (n == 0) {
772 PyErr_SetString(PyExc_ValueError, "empty separator");
773 return NULL;
774 }
775
776 list = PyList_New(0);
777 if (list == NULL)
778 return NULL;
779
780 i = j = 0;
781 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000782 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000783 if (maxsplit-- <= 0)
784 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000785 item = PyString_FromStringAndSize(s+j, (int)(i-j));
786 if (item == NULL)
787 goto fail;
788 err = PyList_Append(list, item);
789 Py_DECREF(item);
790 if (err < 0)
791 goto fail;
792 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000793 }
794 else
795 i++;
796 }
797 item = PyString_FromStringAndSize(s+j, (int)(len-j));
798 if (item == NULL)
799 goto fail;
800 err = PyList_Append(list, item);
801 Py_DECREF(item);
802 if (err < 0)
803 goto fail;
804
805 return list;
806
807 fail:
808 Py_DECREF(list);
809 return NULL;
810}
811
812
813static char join__doc__[] =
814"S.join(sequence) -> string\n\
815\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000816Return a string which is the concatenation of the strings in the\n\
817sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000818
819static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000820string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000821{
822 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000823 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000824 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000825 char *p;
826 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000827 size_t sz = 0;
828 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000829 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000830
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000831 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000832 return NULL;
833
Tim Peters19fe14e2001-01-19 03:03:47 +0000834 seq = PySequence_Fast(orig, "");
835 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000836 if (PyErr_ExceptionMatches(PyExc_TypeError))
837 PyErr_Format(PyExc_TypeError,
838 "sequence expected, %.80s found",
839 orig->ob_type->tp_name);
840 return NULL;
841 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000842
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000843 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000844 if (seqlen == 0) {
845 Py_DECREF(seq);
846 return PyString_FromString("");
847 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000848 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000849 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000850 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
851 PyErr_Format(PyExc_TypeError,
852 "sequence item 0: expected string,"
853 " %.80s found",
854 item->ob_type->tp_name);
855 Py_DECREF(seq);
856 return NULL;
857 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000858 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000859 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000860 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000861 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000862
Tim Peters19fe14e2001-01-19 03:03:47 +0000863 /* There are at least two things to join. Do a pre-pass to figure out
864 * the total amount of space we'll need (sz), see whether any argument
865 * is absurd, and defer to the Unicode join if appropriate.
866 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000867 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000868 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000869 item = PySequence_Fast_GET_ITEM(seq, i);
870 if (!PyString_Check(item)){
871 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +0000872 /* Defer to Unicode join.
873 * CAUTION: There's no gurantee that the
874 * original sequence can be iterated over
875 * again, so we must pass seq here.
876 */
877 PyObject *result;
878 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000879 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +0000880 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000881 }
882 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000883 "sequence item %i: expected string,"
884 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000885 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000886 Py_DECREF(seq);
887 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000888 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000889 sz += PyString_GET_SIZE(item);
890 if (i != 0)
891 sz += seplen;
892 if (sz < old_sz || sz > INT_MAX) {
893 PyErr_SetString(PyExc_OverflowError,
894 "join() is too long for a Python string");
895 Py_DECREF(seq);
896 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000897 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000898 }
899
900 /* Allocate result space. */
901 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
902 if (res == NULL) {
903 Py_DECREF(seq);
904 return NULL;
905 }
906
907 /* Catenate everything. */
908 p = PyString_AS_STRING(res);
909 for (i = 0; i < seqlen; ++i) {
910 size_t n;
911 item = PySequence_Fast_GET_ITEM(seq, i);
912 n = PyString_GET_SIZE(item);
913 memcpy(p, PyString_AS_STRING(item), n);
914 p += n;
915 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000916 memcpy(p, sep, seplen);
917 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000918 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000919 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000920
Jeremy Hylton49048292000-07-11 03:28:17 +0000921 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000922 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000923}
924
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000925static long
Fred Drakeba096332000-07-09 07:04:36 +0000926string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000927{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000928 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000929 int len = PyString_GET_SIZE(self);
930 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000931 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000932
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000933 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +0000934 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000935 return -2;
936 if (PyString_Check(subobj)) {
937 sub = PyString_AS_STRING(subobj);
938 n = PyString_GET_SIZE(subobj);
939 }
940 else if (PyUnicode_Check(subobj))
941 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
942 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000943 return -2;
944
945 if (last > len)
946 last = len;
947 if (last < 0)
948 last += len;
949 if (last < 0)
950 last = 0;
951 if (i < 0)
952 i += len;
953 if (i < 0)
954 i = 0;
955
Guido van Rossum4c08d552000-03-10 22:55:18 +0000956 if (dir > 0) {
957 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000958 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000959 last -= n;
960 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000961 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000962 return (long)i;
963 }
964 else {
965 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000966
Guido van Rossum4c08d552000-03-10 22:55:18 +0000967 if (n == 0 && i <= last)
968 return (long)last;
969 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000970 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000971 return (long)j;
972 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000973
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000974 return -1;
975}
976
977
978static char find__doc__[] =
979"S.find(sub [,start [,end]]) -> int\n\
980\n\
981Return the lowest index in S where substring sub is found,\n\
982such that sub is contained within s[start,end]. Optional\n\
983arguments start and end are interpreted as in slice notation.\n\
984\n\
985Return -1 on failure.";
986
987static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000988string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000989{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000990 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000991 if (result == -2)
992 return NULL;
993 return PyInt_FromLong(result);
994}
995
996
997static char index__doc__[] =
998"S.index(sub [,start [,end]]) -> int\n\
999\n\
1000Like S.find() but raise ValueError when the substring is not found.";
1001
1002static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001003string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001004{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001005 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001006 if (result == -2)
1007 return NULL;
1008 if (result == -1) {
1009 PyErr_SetString(PyExc_ValueError,
1010 "substring not found in string.index");
1011 return NULL;
1012 }
1013 return PyInt_FromLong(result);
1014}
1015
1016
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001017static char rfind__doc__[] =
1018"S.rfind(sub [,start [,end]]) -> int\n\
1019\n\
1020Return the highest index in S where substring sub is found,\n\
1021such that sub is contained within s[start,end]. Optional\n\
1022arguments start and end are interpreted as in slice notation.\n\
1023\n\
1024Return -1 on failure.";
1025
1026static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001027string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001028{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001029 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001030 if (result == -2)
1031 return NULL;
1032 return PyInt_FromLong(result);
1033}
1034
1035
1036static char rindex__doc__[] =
1037"S.rindex(sub [,start [,end]]) -> int\n\
1038\n\
1039Like S.rfind() but raise ValueError when the substring is not found.";
1040
1041static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001042string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001043{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001044 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001045 if (result == -2)
1046 return NULL;
1047 if (result == -1) {
1048 PyErr_SetString(PyExc_ValueError,
1049 "substring not found in string.rindex");
1050 return NULL;
1051 }
1052 return PyInt_FromLong(result);
1053}
1054
1055
1056static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001057do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001058{
1059 char *s = PyString_AS_STRING(self);
1060 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001061
Guido van Rossum43713e52000-02-29 13:59:29 +00001062 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001063 return NULL;
1064
1065 i = 0;
1066 if (striptype != RIGHTSTRIP) {
1067 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1068 i++;
1069 }
1070 }
1071
1072 j = len;
1073 if (striptype != LEFTSTRIP) {
1074 do {
1075 j--;
1076 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1077 j++;
1078 }
1079
1080 if (i == 0 && j == len) {
1081 Py_INCREF(self);
1082 return (PyObject*)self;
1083 }
1084 else
1085 return PyString_FromStringAndSize(s+i, j-i);
1086}
1087
1088
1089static char strip__doc__[] =
1090"S.strip() -> string\n\
1091\n\
1092Return a copy of the string S with leading and trailing\n\
1093whitespace removed.";
1094
1095static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001096string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001097{
1098 return do_strip(self, args, BOTHSTRIP);
1099}
1100
1101
1102static char lstrip__doc__[] =
1103"S.lstrip() -> string\n\
1104\n\
1105Return a copy of the string S with leading whitespace removed.";
1106
1107static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001108string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001109{
1110 return do_strip(self, args, LEFTSTRIP);
1111}
1112
1113
1114static char rstrip__doc__[] =
1115"S.rstrip() -> string\n\
1116\n\
1117Return a copy of the string S with trailing whitespace removed.";
1118
1119static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001120string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001121{
1122 return do_strip(self, args, RIGHTSTRIP);
1123}
1124
1125
1126static char lower__doc__[] =
1127"S.lower() -> string\n\
1128\n\
1129Return a copy of the string S converted to lowercase.";
1130
1131static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001132string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001133{
1134 char *s = PyString_AS_STRING(self), *s_new;
1135 int i, n = PyString_GET_SIZE(self);
1136 PyObject *new;
1137
Guido van Rossum43713e52000-02-29 13:59:29 +00001138 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001139 return NULL;
1140 new = PyString_FromStringAndSize(NULL, n);
1141 if (new == NULL)
1142 return NULL;
1143 s_new = PyString_AsString(new);
1144 for (i = 0; i < n; i++) {
1145 int c = Py_CHARMASK(*s++);
1146 if (isupper(c)) {
1147 *s_new = tolower(c);
1148 } else
1149 *s_new = c;
1150 s_new++;
1151 }
1152 return new;
1153}
1154
1155
1156static char upper__doc__[] =
1157"S.upper() -> string\n\
1158\n\
1159Return a copy of the string S converted to uppercase.";
1160
1161static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001162string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001163{
1164 char *s = PyString_AS_STRING(self), *s_new;
1165 int i, n = PyString_GET_SIZE(self);
1166 PyObject *new;
1167
Guido van Rossum43713e52000-02-29 13:59:29 +00001168 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001169 return NULL;
1170 new = PyString_FromStringAndSize(NULL, n);
1171 if (new == NULL)
1172 return NULL;
1173 s_new = PyString_AsString(new);
1174 for (i = 0; i < n; i++) {
1175 int c = Py_CHARMASK(*s++);
1176 if (islower(c)) {
1177 *s_new = toupper(c);
1178 } else
1179 *s_new = c;
1180 s_new++;
1181 }
1182 return new;
1183}
1184
1185
Guido van Rossum4c08d552000-03-10 22:55:18 +00001186static char title__doc__[] =
1187"S.title() -> string\n\
1188\n\
1189Return a titlecased version of S, i.e. words start with uppercase\n\
1190characters, all remaining cased characters have lowercase.";
1191
1192static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001193string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001194{
1195 char *s = PyString_AS_STRING(self), *s_new;
1196 int i, n = PyString_GET_SIZE(self);
1197 int previous_is_cased = 0;
1198 PyObject *new;
1199
1200 if (!PyArg_ParseTuple(args, ":title"))
1201 return NULL;
1202 new = PyString_FromStringAndSize(NULL, n);
1203 if (new == NULL)
1204 return NULL;
1205 s_new = PyString_AsString(new);
1206 for (i = 0; i < n; i++) {
1207 int c = Py_CHARMASK(*s++);
1208 if (islower(c)) {
1209 if (!previous_is_cased)
1210 c = toupper(c);
1211 previous_is_cased = 1;
1212 } else if (isupper(c)) {
1213 if (previous_is_cased)
1214 c = tolower(c);
1215 previous_is_cased = 1;
1216 } else
1217 previous_is_cased = 0;
1218 *s_new++ = c;
1219 }
1220 return new;
1221}
1222
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001223static char capitalize__doc__[] =
1224"S.capitalize() -> string\n\
1225\n\
1226Return a copy of the string S with only its first character\n\
1227capitalized.";
1228
1229static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001230string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001231{
1232 char *s = PyString_AS_STRING(self), *s_new;
1233 int i, n = PyString_GET_SIZE(self);
1234 PyObject *new;
1235
Guido van Rossum43713e52000-02-29 13:59:29 +00001236 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001237 return NULL;
1238 new = PyString_FromStringAndSize(NULL, n);
1239 if (new == NULL)
1240 return NULL;
1241 s_new = PyString_AsString(new);
1242 if (0 < n) {
1243 int c = Py_CHARMASK(*s++);
1244 if (islower(c))
1245 *s_new = toupper(c);
1246 else
1247 *s_new = c;
1248 s_new++;
1249 }
1250 for (i = 1; i < n; i++) {
1251 int c = Py_CHARMASK(*s++);
1252 if (isupper(c))
1253 *s_new = tolower(c);
1254 else
1255 *s_new = c;
1256 s_new++;
1257 }
1258 return new;
1259}
1260
1261
1262static char count__doc__[] =
1263"S.count(sub[, start[, end]]) -> int\n\
1264\n\
1265Return the number of occurrences of substring sub in string\n\
1266S[start:end]. Optional arguments start and end are\n\
1267interpreted as in slice notation.";
1268
1269static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001270string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001271{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001272 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001273 int len = PyString_GET_SIZE(self), n;
1274 int i = 0, last = INT_MAX;
1275 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001276 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001277
Guido van Rossumc6821402000-05-08 14:08:05 +00001278 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1279 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001280 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001281
Guido van Rossum4c08d552000-03-10 22:55:18 +00001282 if (PyString_Check(subobj)) {
1283 sub = PyString_AS_STRING(subobj);
1284 n = PyString_GET_SIZE(subobj);
1285 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001286 else if (PyUnicode_Check(subobj)) {
1287 int count;
1288 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1289 if (count == -1)
1290 return NULL;
1291 else
1292 return PyInt_FromLong((long) count);
1293 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001294 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1295 return NULL;
1296
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001297 if (last > len)
1298 last = len;
1299 if (last < 0)
1300 last += len;
1301 if (last < 0)
1302 last = 0;
1303 if (i < 0)
1304 i += len;
1305 if (i < 0)
1306 i = 0;
1307 m = last + 1 - n;
1308 if (n == 0)
1309 return PyInt_FromLong((long) (m-i));
1310
1311 r = 0;
1312 while (i < m) {
1313 if (!memcmp(s+i, sub, n)) {
1314 r++;
1315 i += n;
1316 } else {
1317 i++;
1318 }
1319 }
1320 return PyInt_FromLong((long) r);
1321}
1322
1323
1324static char swapcase__doc__[] =
1325"S.swapcase() -> string\n\
1326\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001327Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001328converted to lowercase and vice versa.";
1329
1330static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001331string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001332{
1333 char *s = PyString_AS_STRING(self), *s_new;
1334 int i, n = PyString_GET_SIZE(self);
1335 PyObject *new;
1336
Guido van Rossum43713e52000-02-29 13:59:29 +00001337 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001338 return NULL;
1339 new = PyString_FromStringAndSize(NULL, n);
1340 if (new == NULL)
1341 return NULL;
1342 s_new = PyString_AsString(new);
1343 for (i = 0; i < n; i++) {
1344 int c = Py_CHARMASK(*s++);
1345 if (islower(c)) {
1346 *s_new = toupper(c);
1347 }
1348 else if (isupper(c)) {
1349 *s_new = tolower(c);
1350 }
1351 else
1352 *s_new = c;
1353 s_new++;
1354 }
1355 return new;
1356}
1357
1358
1359static char translate__doc__[] =
1360"S.translate(table [,deletechars]) -> string\n\
1361\n\
1362Return a copy of the string S, where all characters occurring\n\
1363in the optional argument deletechars are removed, and the\n\
1364remaining characters have been mapped through the given\n\
1365translation table, which must be a string of length 256.";
1366
1367static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001368string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001369{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001370 register char *input, *output;
1371 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001372 register int i, c, changed = 0;
1373 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001374 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001375 int inlen, tablen, dellen = 0;
1376 PyObject *result;
1377 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001378 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001379
Guido van Rossum4c08d552000-03-10 22:55:18 +00001380 if (!PyArg_ParseTuple(args, "O|O:translate",
1381 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001382 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001383
1384 if (PyString_Check(tableobj)) {
1385 table1 = PyString_AS_STRING(tableobj);
1386 tablen = PyString_GET_SIZE(tableobj);
1387 }
1388 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001389 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001390 parameter; instead a mapping to None will cause characters
1391 to be deleted. */
1392 if (delobj != NULL) {
1393 PyErr_SetString(PyExc_TypeError,
1394 "deletions are implemented differently for unicode");
1395 return NULL;
1396 }
1397 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1398 }
1399 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001400 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001401
1402 if (delobj != NULL) {
1403 if (PyString_Check(delobj)) {
1404 del_table = PyString_AS_STRING(delobj);
1405 dellen = PyString_GET_SIZE(delobj);
1406 }
1407 else if (PyUnicode_Check(delobj)) {
1408 PyErr_SetString(PyExc_TypeError,
1409 "deletions are implemented differently for unicode");
1410 return NULL;
1411 }
1412 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1413 return NULL;
1414
1415 if (tablen != 256) {
1416 PyErr_SetString(PyExc_ValueError,
1417 "translation table must be 256 characters long");
1418 return NULL;
1419 }
1420 }
1421 else {
1422 del_table = NULL;
1423 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001424 }
1425
1426 table = table1;
1427 inlen = PyString_Size(input_obj);
1428 result = PyString_FromStringAndSize((char *)NULL, inlen);
1429 if (result == NULL)
1430 return NULL;
1431 output_start = output = PyString_AsString(result);
1432 input = PyString_AsString(input_obj);
1433
1434 if (dellen == 0) {
1435 /* If no deletions are required, use faster code */
1436 for (i = inlen; --i >= 0; ) {
1437 c = Py_CHARMASK(*input++);
1438 if (Py_CHARMASK((*output++ = table[c])) != c)
1439 changed = 1;
1440 }
1441 if (changed)
1442 return result;
1443 Py_DECREF(result);
1444 Py_INCREF(input_obj);
1445 return input_obj;
1446 }
1447
1448 for (i = 0; i < 256; i++)
1449 trans_table[i] = Py_CHARMASK(table[i]);
1450
1451 for (i = 0; i < dellen; i++)
1452 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1453
1454 for (i = inlen; --i >= 0; ) {
1455 c = Py_CHARMASK(*input++);
1456 if (trans_table[c] != -1)
1457 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1458 continue;
1459 changed = 1;
1460 }
1461 if (!changed) {
1462 Py_DECREF(result);
1463 Py_INCREF(input_obj);
1464 return input_obj;
1465 }
1466 /* Fix the size of the resulting string */
1467 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1468 return NULL;
1469 return result;
1470}
1471
1472
1473/* What follows is used for implementing replace(). Perry Stoll. */
1474
1475/*
1476 mymemfind
1477
1478 strstr replacement for arbitrary blocks of memory.
1479
Barry Warsaw51ac5802000-03-20 16:36:48 +00001480 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001481 contents of memory pointed to by PAT. Returns the index into MEM if
1482 found, or -1 if not found. If len of PAT is greater than length of
1483 MEM, the function returns -1.
1484*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001485static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001486mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487{
1488 register int ii;
1489
1490 /* pattern can not occur in the last pat_len-1 chars */
1491 len -= pat_len;
1492
1493 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001494 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001495 return ii;
1496 }
1497 }
1498 return -1;
1499}
1500
1501/*
1502 mymemcnt
1503
1504 Return the number of distinct times PAT is found in MEM.
1505 meaning mem=1111 and pat==11 returns 2.
1506 mem=11111 and pat==11 also return 2.
1507 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001508static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001509mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001510{
1511 register int offset = 0;
1512 int nfound = 0;
1513
1514 while (len >= 0) {
1515 offset = mymemfind(mem, len, pat, pat_len);
1516 if (offset == -1)
1517 break;
1518 mem += offset + pat_len;
1519 len -= offset + pat_len;
1520 nfound++;
1521 }
1522 return nfound;
1523}
1524
1525/*
1526 mymemreplace
1527
Thomas Wouters7e474022000-07-16 12:04:32 +00001528 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001529 replaced with SUB.
1530
Thomas Wouters7e474022000-07-16 12:04:32 +00001531 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001532 of PAT in STR, then the original string is returned. Otherwise, a new
1533 string is allocated here and returned.
1534
1535 on return, out_len is:
1536 the length of output string, or
1537 -1 if the input string is returned, or
1538 unchanged if an error occurs (no memory).
1539
1540 return value is:
1541 the new string allocated locally, or
1542 NULL if an error occurred.
1543*/
1544static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001545mymemreplace(const char *str, int len, /* input string */
1546 const char *pat, int pat_len, /* pattern string to find */
1547 const char *sub, int sub_len, /* substitution string */
1548 int count, /* number of replacements */
Tim Peters4cd44ef2001-05-10 00:05:33 +00001549 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001550{
1551 char *out_s;
1552 char *new_s;
1553 int nfound, offset, new_len;
1554
1555 if (len == 0 || pat_len > len)
1556 goto return_same;
1557
1558 /* find length of output string */
1559 nfound = mymemcnt(str, len, pat, pat_len);
Tim Peters9c012af2001-05-10 00:32:57 +00001560 if (count < 0)
1561 count = INT_MAX;
1562 else if (nfound > count)
1563 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001564 if (nfound == 0)
1565 goto return_same;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001566
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001567 new_len = len + nfound*(sub_len - pat_len);
Tim Peters4cd44ef2001-05-10 00:05:33 +00001568 if (new_len == 0) {
1569 /* Have to allocate something for the caller to free(). */
1570 out_s = (char *)PyMem_MALLOC(1);
Tim Peters9c012af2001-05-10 00:32:57 +00001571 if (out_s == NULL)
Tim Peters4cd44ef2001-05-10 00:05:33 +00001572 return NULL;
1573 out_s[0] = '\0';
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001574 }
Tim Peters4cd44ef2001-05-10 00:05:33 +00001575 else {
1576 assert(new_len > 0);
1577 new_s = (char *)PyMem_MALLOC(new_len);
1578 if (new_s == NULL)
1579 return NULL;
1580 out_s = new_s;
1581
Tim Peters9c012af2001-05-10 00:32:57 +00001582 for (; count > 0 && len > 0; --count) {
Tim Peters4cd44ef2001-05-10 00:05:33 +00001583 /* find index of next instance of pattern */
1584 offset = mymemfind(str, len, pat, pat_len);
1585 if (offset == -1)
1586 break;
1587
1588 /* copy non matching part of input string */
1589 memcpy(new_s, str, offset);
1590 str += offset + pat_len;
1591 len -= offset + pat_len;
1592
1593 /* copy substitute into the output string */
1594 new_s += offset;
1595 memcpy(new_s, sub, sub_len);
1596 new_s += sub_len;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001597 }
1598 /* copy any remaining values into output string */
1599 if (len > 0)
1600 memcpy(new_s, str, len);
1601 }
1602 *out_len = new_len;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001603 return out_s;
1604
1605 return_same:
1606 *out_len = -1;
Tim Peters4cd44ef2001-05-10 00:05:33 +00001607 return (char *)str; /* cast away const */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001608}
1609
1610
1611static char replace__doc__[] =
1612"S.replace (old, new[, maxsplit]) -> string\n\
1613\n\
1614Return a copy of string S with all occurrences of substring\n\
1615old replaced by new. If the optional argument maxsplit is\n\
1616given, only the first maxsplit occurrences are replaced.";
1617
1618static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001619string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001620{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001621 const char *str = PyString_AS_STRING(self), *sub, *repl;
1622 char *new_s;
1623 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1624 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001625 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001626 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001627
Guido van Rossum4c08d552000-03-10 22:55:18 +00001628 if (!PyArg_ParseTuple(args, "OO|i:replace",
1629 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001630 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001631
1632 if (PyString_Check(subobj)) {
1633 sub = PyString_AS_STRING(subobj);
1634 sub_len = PyString_GET_SIZE(subobj);
1635 }
1636 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001637 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001638 subobj, replobj, count);
1639 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1640 return NULL;
1641
1642 if (PyString_Check(replobj)) {
1643 repl = PyString_AS_STRING(replobj);
1644 repl_len = PyString_GET_SIZE(replobj);
1645 }
1646 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001647 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001648 subobj, replobj, count);
1649 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1650 return NULL;
1651
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001652 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001653 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654 return NULL;
1655 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001656 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001657 if (new_s == NULL) {
1658 PyErr_NoMemory();
1659 return NULL;
1660 }
1661 if (out_len == -1) {
1662 /* we're returning another reference to self */
1663 new = (PyObject*)self;
1664 Py_INCREF(new);
1665 }
1666 else {
1667 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001668 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001669 }
1670 return new;
1671}
1672
1673
1674static char startswith__doc__[] =
1675"S.startswith(prefix[, start[, end]]) -> int\n\
1676\n\
1677Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1678optional start, test S beginning at that position. With optional end, stop\n\
1679comparing S at that position.";
1680
1681static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001682string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001683{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001684 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001685 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001686 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001687 int plen;
1688 int start = 0;
1689 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001690 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001691
Guido van Rossumc6821402000-05-08 14:08:05 +00001692 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1693 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001694 return NULL;
1695 if (PyString_Check(subobj)) {
1696 prefix = PyString_AS_STRING(subobj);
1697 plen = PyString_GET_SIZE(subobj);
1698 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001699 else if (PyUnicode_Check(subobj)) {
1700 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001701 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001702 subobj, start, end, -1);
1703 if (rc == -1)
1704 return NULL;
1705 else
1706 return PyInt_FromLong((long) rc);
1707 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001708 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001709 return NULL;
1710
1711 /* adopt Java semantics for index out of range. it is legal for
1712 * offset to be == plen, but this only returns true if prefix is
1713 * the empty string.
1714 */
1715 if (start < 0 || start+plen > len)
1716 return PyInt_FromLong(0);
1717
1718 if (!memcmp(str+start, prefix, plen)) {
1719 /* did the match end after the specified end? */
1720 if (end < 0)
1721 return PyInt_FromLong(1);
1722 else if (end - start < plen)
1723 return PyInt_FromLong(0);
1724 else
1725 return PyInt_FromLong(1);
1726 }
1727 else return PyInt_FromLong(0);
1728}
1729
1730
1731static char endswith__doc__[] =
1732"S.endswith(suffix[, start[, end]]) -> int\n\
1733\n\
1734Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1735optional start, test S beginning at that position. With optional end, stop\n\
1736comparing S at that position.";
1737
1738static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001739string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001740{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001741 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001742 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001743 const char* suffix;
1744 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001745 int start = 0;
1746 int end = -1;
1747 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001748 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001749
Guido van Rossumc6821402000-05-08 14:08:05 +00001750 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1751 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001752 return NULL;
1753 if (PyString_Check(subobj)) {
1754 suffix = PyString_AS_STRING(subobj);
1755 slen = PyString_GET_SIZE(subobj);
1756 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001757 else if (PyUnicode_Check(subobj)) {
1758 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001759 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001760 subobj, start, end, +1);
1761 if (rc == -1)
1762 return NULL;
1763 else
1764 return PyInt_FromLong((long) rc);
1765 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001766 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001767 return NULL;
1768
Guido van Rossum4c08d552000-03-10 22:55:18 +00001769 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001770 return PyInt_FromLong(0);
1771
1772 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001773 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001774
Guido van Rossum4c08d552000-03-10 22:55:18 +00001775 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001776 return PyInt_FromLong(1);
1777 else return PyInt_FromLong(0);
1778}
1779
1780
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001781static char encode__doc__[] =
1782"S.encode([encoding[,errors]]) -> string\n\
1783\n\
1784Return an encoded string version of S. Default encoding is the current\n\
1785default string encoding. errors may be given to set a different error\n\
1786handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1787a ValueError. Other possible values are 'ignore' and 'replace'.";
1788
1789static PyObject *
1790string_encode(PyStringObject *self, PyObject *args)
1791{
1792 char *encoding = NULL;
1793 char *errors = NULL;
1794 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1795 return NULL;
1796 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1797}
1798
1799
Guido van Rossum4c08d552000-03-10 22:55:18 +00001800static char expandtabs__doc__[] =
1801"S.expandtabs([tabsize]) -> string\n\
1802\n\
1803Return a copy of S where all tab characters are expanded using spaces.\n\
1804If tabsize is not given, a tab size of 8 characters is assumed.";
1805
1806static PyObject*
1807string_expandtabs(PyStringObject *self, PyObject *args)
1808{
1809 const char *e, *p;
1810 char *q;
1811 int i, j;
1812 PyObject *u;
1813 int tabsize = 8;
1814
1815 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1816 return NULL;
1817
Thomas Wouters7e474022000-07-16 12:04:32 +00001818 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001819 i = j = 0;
1820 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1821 for (p = PyString_AS_STRING(self); p < e; p++)
1822 if (*p == '\t') {
1823 if (tabsize > 0)
1824 j += tabsize - (j % tabsize);
1825 }
1826 else {
1827 j++;
1828 if (*p == '\n' || *p == '\r') {
1829 i += j;
1830 j = 0;
1831 }
1832 }
1833
1834 /* Second pass: create output string and fill it */
1835 u = PyString_FromStringAndSize(NULL, i + j);
1836 if (!u)
1837 return NULL;
1838
1839 j = 0;
1840 q = PyString_AS_STRING(u);
1841
1842 for (p = PyString_AS_STRING(self); p < e; p++)
1843 if (*p == '\t') {
1844 if (tabsize > 0) {
1845 i = tabsize - (j % tabsize);
1846 j += i;
1847 while (i--)
1848 *q++ = ' ';
1849 }
1850 }
1851 else {
1852 j++;
1853 *q++ = *p;
1854 if (*p == '\n' || *p == '\r')
1855 j = 0;
1856 }
1857
1858 return u;
1859}
1860
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001861static
1862PyObject *pad(PyStringObject *self,
1863 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001864 int right,
1865 char fill)
1866{
1867 PyObject *u;
1868
1869 if (left < 0)
1870 left = 0;
1871 if (right < 0)
1872 right = 0;
1873
1874 if (left == 0 && right == 0) {
1875 Py_INCREF(self);
1876 return (PyObject *)self;
1877 }
1878
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001879 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001880 left + PyString_GET_SIZE(self) + right);
1881 if (u) {
1882 if (left)
1883 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001884 memcpy(PyString_AS_STRING(u) + left,
1885 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00001886 PyString_GET_SIZE(self));
1887 if (right)
1888 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1889 fill, right);
1890 }
1891
1892 return u;
1893}
1894
1895static char ljust__doc__[] =
1896"S.ljust(width) -> string\n\
1897\n\
1898Return S left justified in a string of length width. Padding is\n\
1899done using spaces.";
1900
1901static PyObject *
1902string_ljust(PyStringObject *self, PyObject *args)
1903{
1904 int width;
1905 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1906 return NULL;
1907
1908 if (PyString_GET_SIZE(self) >= width) {
1909 Py_INCREF(self);
1910 return (PyObject*) self;
1911 }
1912
1913 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1914}
1915
1916
1917static char rjust__doc__[] =
1918"S.rjust(width) -> string\n\
1919\n\
1920Return S right justified in a string of length width. Padding is\n\
1921done using spaces.";
1922
1923static PyObject *
1924string_rjust(PyStringObject *self, PyObject *args)
1925{
1926 int width;
1927 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1928 return NULL;
1929
1930 if (PyString_GET_SIZE(self) >= width) {
1931 Py_INCREF(self);
1932 return (PyObject*) self;
1933 }
1934
1935 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1936}
1937
1938
1939static char center__doc__[] =
1940"S.center(width) -> string\n\
1941\n\
1942Return S centered in a string of length width. Padding is done\n\
1943using spaces.";
1944
1945static PyObject *
1946string_center(PyStringObject *self, PyObject *args)
1947{
1948 int marg, left;
1949 int width;
1950
1951 if (!PyArg_ParseTuple(args, "i:center", &width))
1952 return NULL;
1953
1954 if (PyString_GET_SIZE(self) >= width) {
1955 Py_INCREF(self);
1956 return (PyObject*) self;
1957 }
1958
1959 marg = width - PyString_GET_SIZE(self);
1960 left = marg / 2 + (marg & width & 1);
1961
1962 return pad(self, left, marg - left, ' ');
1963}
1964
1965#if 0
1966static char zfill__doc__[] =
1967"S.zfill(width) -> string\n\
1968\n\
1969Pad a numeric string x with zeros on the left, to fill a field\n\
1970of the specified width. The string x is never truncated.";
1971
1972static PyObject *
1973string_zfill(PyStringObject *self, PyObject *args)
1974{
1975 int fill;
1976 PyObject *u;
1977 char *str;
1978
1979 int width;
1980 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1981 return NULL;
1982
1983 if (PyString_GET_SIZE(self) >= width) {
1984 Py_INCREF(self);
1985 return (PyObject*) self;
1986 }
1987
1988 fill = width - PyString_GET_SIZE(self);
1989
1990 u = pad(self, fill, 0, '0');
1991 if (u == NULL)
1992 return NULL;
1993
1994 str = PyString_AS_STRING(u);
1995 if (str[fill] == '+' || str[fill] == '-') {
1996 /* move sign to beginning of string */
1997 str[0] = str[fill];
1998 str[fill] = '0';
1999 }
2000
2001 return u;
2002}
2003#endif
2004
2005static char isspace__doc__[] =
2006"S.isspace() -> int\n\
2007\n\
2008Return 1 if there are only whitespace characters in S,\n\
20090 otherwise.";
2010
2011static PyObject*
2012string_isspace(PyStringObject *self, PyObject *args)
2013{
Fred Drakeba096332000-07-09 07:04:36 +00002014 register const unsigned char *p
2015 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002016 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002017
2018 if (!PyArg_NoArgs(args))
2019 return NULL;
2020
2021 /* Shortcut for single character strings */
2022 if (PyString_GET_SIZE(self) == 1 &&
2023 isspace(*p))
2024 return PyInt_FromLong(1);
2025
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002026 /* Special case for empty strings */
2027 if (PyString_GET_SIZE(self) == 0)
2028 return PyInt_FromLong(0);
2029
Guido van Rossum4c08d552000-03-10 22:55:18 +00002030 e = p + PyString_GET_SIZE(self);
2031 for (; p < e; p++) {
2032 if (!isspace(*p))
2033 return PyInt_FromLong(0);
2034 }
2035 return PyInt_FromLong(1);
2036}
2037
2038
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002039static char isalpha__doc__[] =
2040"S.isalpha() -> int\n\
2041\n\
2042Return 1 if all characters in S are alphabetic\n\
2043and there is at least one character in S, 0 otherwise.";
2044
2045static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002046string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002047{
Fred Drakeba096332000-07-09 07:04:36 +00002048 register const unsigned char *p
2049 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002050 register const unsigned char *e;
2051
2052 if (!PyArg_NoArgs(args))
2053 return NULL;
2054
2055 /* Shortcut for single character strings */
2056 if (PyString_GET_SIZE(self) == 1 &&
2057 isalpha(*p))
2058 return PyInt_FromLong(1);
2059
2060 /* Special case for empty strings */
2061 if (PyString_GET_SIZE(self) == 0)
2062 return PyInt_FromLong(0);
2063
2064 e = p + PyString_GET_SIZE(self);
2065 for (; p < e; p++) {
2066 if (!isalpha(*p))
2067 return PyInt_FromLong(0);
2068 }
2069 return PyInt_FromLong(1);
2070}
2071
2072
2073static char isalnum__doc__[] =
2074"S.isalnum() -> int\n\
2075\n\
2076Return 1 if all characters in S are alphanumeric\n\
2077and there is at least one character in S, 0 otherwise.";
2078
2079static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002080string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002081{
Fred Drakeba096332000-07-09 07:04:36 +00002082 register const unsigned char *p
2083 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002084 register const unsigned char *e;
2085
2086 if (!PyArg_NoArgs(args))
2087 return NULL;
2088
2089 /* Shortcut for single character strings */
2090 if (PyString_GET_SIZE(self) == 1 &&
2091 isalnum(*p))
2092 return PyInt_FromLong(1);
2093
2094 /* Special case for empty strings */
2095 if (PyString_GET_SIZE(self) == 0)
2096 return PyInt_FromLong(0);
2097
2098 e = p + PyString_GET_SIZE(self);
2099 for (; p < e; p++) {
2100 if (!isalnum(*p))
2101 return PyInt_FromLong(0);
2102 }
2103 return PyInt_FromLong(1);
2104}
2105
2106
Guido van Rossum4c08d552000-03-10 22:55:18 +00002107static char isdigit__doc__[] =
2108"S.isdigit() -> int\n\
2109\n\
2110Return 1 if there are only digit characters in S,\n\
21110 otherwise.";
2112
2113static PyObject*
2114string_isdigit(PyStringObject *self, PyObject *args)
2115{
Fred Drakeba096332000-07-09 07:04:36 +00002116 register const unsigned char *p
2117 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002118 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002119
2120 if (!PyArg_NoArgs(args))
2121 return NULL;
2122
2123 /* Shortcut for single character strings */
2124 if (PyString_GET_SIZE(self) == 1 &&
2125 isdigit(*p))
2126 return PyInt_FromLong(1);
2127
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002128 /* Special case for empty strings */
2129 if (PyString_GET_SIZE(self) == 0)
2130 return PyInt_FromLong(0);
2131
Guido van Rossum4c08d552000-03-10 22:55:18 +00002132 e = p + PyString_GET_SIZE(self);
2133 for (; p < e; p++) {
2134 if (!isdigit(*p))
2135 return PyInt_FromLong(0);
2136 }
2137 return PyInt_FromLong(1);
2138}
2139
2140
2141static char islower__doc__[] =
2142"S.islower() -> int\n\
2143\n\
2144Return 1 if all cased characters in S are lowercase and there is\n\
2145at least one cased character in S, 0 otherwise.";
2146
2147static PyObject*
2148string_islower(PyStringObject *self, PyObject *args)
2149{
Fred Drakeba096332000-07-09 07:04:36 +00002150 register const unsigned char *p
2151 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002152 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002153 int cased;
2154
2155 if (!PyArg_NoArgs(args))
2156 return NULL;
2157
2158 /* Shortcut for single character strings */
2159 if (PyString_GET_SIZE(self) == 1)
2160 return PyInt_FromLong(islower(*p) != 0);
2161
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002162 /* Special case for empty strings */
2163 if (PyString_GET_SIZE(self) == 0)
2164 return PyInt_FromLong(0);
2165
Guido van Rossum4c08d552000-03-10 22:55:18 +00002166 e = p + PyString_GET_SIZE(self);
2167 cased = 0;
2168 for (; p < e; p++) {
2169 if (isupper(*p))
2170 return PyInt_FromLong(0);
2171 else if (!cased && islower(*p))
2172 cased = 1;
2173 }
2174 return PyInt_FromLong(cased);
2175}
2176
2177
2178static char isupper__doc__[] =
2179"S.isupper() -> int\n\
2180\n\
2181Return 1 if all cased characters in S are uppercase and there is\n\
2182at least one cased character in S, 0 otherwise.";
2183
2184static PyObject*
2185string_isupper(PyStringObject *self, PyObject *args)
2186{
Fred Drakeba096332000-07-09 07:04:36 +00002187 register const unsigned char *p
2188 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002189 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002190 int cased;
2191
2192 if (!PyArg_NoArgs(args))
2193 return NULL;
2194
2195 /* Shortcut for single character strings */
2196 if (PyString_GET_SIZE(self) == 1)
2197 return PyInt_FromLong(isupper(*p) != 0);
2198
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002199 /* Special case for empty strings */
2200 if (PyString_GET_SIZE(self) == 0)
2201 return PyInt_FromLong(0);
2202
Guido van Rossum4c08d552000-03-10 22:55:18 +00002203 e = p + PyString_GET_SIZE(self);
2204 cased = 0;
2205 for (; p < e; p++) {
2206 if (islower(*p))
2207 return PyInt_FromLong(0);
2208 else if (!cased && isupper(*p))
2209 cased = 1;
2210 }
2211 return PyInt_FromLong(cased);
2212}
2213
2214
2215static char istitle__doc__[] =
2216"S.istitle() -> int\n\
2217\n\
2218Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2219may only follow uncased characters and lowercase characters only cased\n\
2220ones. Return 0 otherwise.";
2221
2222static PyObject*
2223string_istitle(PyStringObject *self, PyObject *args)
2224{
Fred Drakeba096332000-07-09 07:04:36 +00002225 register const unsigned char *p
2226 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002227 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002228 int cased, previous_is_cased;
2229
2230 if (!PyArg_NoArgs(args))
2231 return NULL;
2232
2233 /* Shortcut for single character strings */
2234 if (PyString_GET_SIZE(self) == 1)
2235 return PyInt_FromLong(isupper(*p) != 0);
2236
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002237 /* Special case for empty strings */
2238 if (PyString_GET_SIZE(self) == 0)
2239 return PyInt_FromLong(0);
2240
Guido van Rossum4c08d552000-03-10 22:55:18 +00002241 e = p + PyString_GET_SIZE(self);
2242 cased = 0;
2243 previous_is_cased = 0;
2244 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002245 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002246
2247 if (isupper(ch)) {
2248 if (previous_is_cased)
2249 return PyInt_FromLong(0);
2250 previous_is_cased = 1;
2251 cased = 1;
2252 }
2253 else if (islower(ch)) {
2254 if (!previous_is_cased)
2255 return PyInt_FromLong(0);
2256 previous_is_cased = 1;
2257 cased = 1;
2258 }
2259 else
2260 previous_is_cased = 0;
2261 }
2262 return PyInt_FromLong(cased);
2263}
2264
2265
2266static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002267"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002268\n\
2269Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002270Line breaks are not included in the resulting list unless keepends\n\
2271is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272
2273#define SPLIT_APPEND(data, left, right) \
2274 str = PyString_FromStringAndSize(data + left, right - left); \
2275 if (!str) \
2276 goto onError; \
2277 if (PyList_Append(list, str)) { \
2278 Py_DECREF(str); \
2279 goto onError; \
2280 } \
2281 else \
2282 Py_DECREF(str);
2283
2284static PyObject*
2285string_splitlines(PyStringObject *self, PyObject *args)
2286{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002287 register int i;
2288 register int j;
2289 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002290 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002291 PyObject *list;
2292 PyObject *str;
2293 char *data;
2294
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002295 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296 return NULL;
2297
2298 data = PyString_AS_STRING(self);
2299 len = PyString_GET_SIZE(self);
2300
Guido van Rossum4c08d552000-03-10 22:55:18 +00002301 list = PyList_New(0);
2302 if (!list)
2303 goto onError;
2304
2305 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002306 int eol;
2307
Guido van Rossum4c08d552000-03-10 22:55:18 +00002308 /* Find a line and append it */
2309 while (i < len && data[i] != '\n' && data[i] != '\r')
2310 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002311
2312 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002313 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002314 if (i < len) {
2315 if (data[i] == '\r' && i + 1 < len &&
2316 data[i+1] == '\n')
2317 i += 2;
2318 else
2319 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002320 if (keepends)
2321 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002322 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002323 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002324 j = i;
2325 }
2326 if (j < len) {
2327 SPLIT_APPEND(data, j, len);
2328 }
2329
2330 return list;
2331
2332 onError:
2333 Py_DECREF(list);
2334 return NULL;
2335}
2336
2337#undef SPLIT_APPEND
2338
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002339
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002340static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002341string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002342 /* Counterparts of the obsolete stropmodule functions; except
2343 string.maketrans(). */
2344 {"join", (PyCFunction)string_join, 1, join__doc__},
2345 {"split", (PyCFunction)string_split, 1, split__doc__},
2346 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2347 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2348 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2349 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2350 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2351 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2352 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002353 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2354 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002355 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2356 {"count", (PyCFunction)string_count, 1, count__doc__},
2357 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2358 {"find", (PyCFunction)string_find, 1, find__doc__},
2359 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002360 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002361 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2362 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2363 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2364 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002365 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2366 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2367 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002368 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2369 {"title", (PyCFunction)string_title, 1, title__doc__},
2370 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2371 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2372 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002373 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002374 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2375 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2376#if 0
2377 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2378#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002379 {NULL, NULL} /* sentinel */
2380};
2381
2382static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002383string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002384{
2385 return Py_FindMethod(string_methods, (PyObject*)s, name);
2386}
2387
2388
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002389PyTypeObject PyString_Type = {
2390 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002391 0,
2392 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002393 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002394 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002395 (destructor)string_dealloc, /*tp_dealloc*/
2396 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002397 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002398 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002399 (cmpfunc)string_compare, /*tp_compare*/
2400 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002401 0, /*tp_as_number*/
2402 &string_as_sequence, /*tp_as_sequence*/
2403 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002404 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002405 0, /*tp_call*/
Guido van Rossum189f1df2001-05-01 16:51:53 +00002406 (reprfunc)string_str, /*tp_str*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002407 0, /*tp_getattro*/
2408 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002409 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002410 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002411 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002412};
2413
2414void
Fred Drakeba096332000-07-09 07:04:36 +00002415PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002416{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002417 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002418 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002419 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002420 if (w == NULL || !PyString_Check(*pv)) {
2421 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002422 *pv = NULL;
2423 return;
2424 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002425 v = string_concat((PyStringObject *) *pv, w);
2426 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002427 *pv = v;
2428}
2429
Guido van Rossum013142a1994-08-30 08:19:36 +00002430void
Fred Drakeba096332000-07-09 07:04:36 +00002431PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002432{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002433 PyString_Concat(pv, w);
2434 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002435}
2436
2437
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002438/* The following function breaks the notion that strings are immutable:
2439 it changes the size of a string. We get away with this only if there
2440 is only one module referencing the object. You can also think of it
2441 as creating a new string object and destroying the old one, only
2442 more efficiently. In any case, don't use this if the string may
2443 already be known to some other part of the code... */
2444
2445int
Fred Drakeba096332000-07-09 07:04:36 +00002446_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002447{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002448 register PyObject *v;
2449 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002450 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002451 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002452 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002453 Py_DECREF(v);
2454 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002455 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002456 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002457 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002458#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002459 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002460#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002461 _Py_ForgetReference(v);
2462 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002463 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002464 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002465 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002466 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002467 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002468 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002469 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002470 _Py_NewReference(*pv);
2471 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002472 sv->ob_size = newsize;
2473 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002474 return 0;
2475}
Guido van Rossume5372401993-03-16 12:15:04 +00002476
2477/* Helpers for formatstring */
2478
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002479static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002480getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002481{
2482 int argidx = *p_argidx;
2483 if (argidx < arglen) {
2484 (*p_argidx)++;
2485 if (arglen < 0)
2486 return args;
2487 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002488 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002489 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002490 PyErr_SetString(PyExc_TypeError,
2491 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002492 return NULL;
2493}
2494
Tim Peters38fd5b62000-09-21 05:43:11 +00002495/* Format codes
2496 * F_LJUST '-'
2497 * F_SIGN '+'
2498 * F_BLANK ' '
2499 * F_ALT '#'
2500 * F_ZERO '0'
2501 */
Guido van Rossume5372401993-03-16 12:15:04 +00002502#define F_LJUST (1<<0)
2503#define F_SIGN (1<<1)
2504#define F_BLANK (1<<2)
2505#define F_ALT (1<<3)
2506#define F_ZERO (1<<4)
2507
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002508static int
Fred Drakeba096332000-07-09 07:04:36 +00002509formatfloat(char *buf, size_t buflen, int flags,
2510 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002511{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002512 /* fmt = '%#.' + `prec` + `type`
2513 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002514 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002515 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002516 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002517 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002518 if (prec < 0)
2519 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002520 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2521 type = 'g';
2522 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002523 /* worst case length calc to ensure no buffer overrun:
2524 fmt = %#.<prec>g
2525 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002526 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002527 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2528 If prec=0 the effective precision is 1 (the leading digit is
2529 always given), therefore increase by one to 10+prec. */
2530 if (buflen <= (size_t)10 + (size_t)prec) {
2531 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002532 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002533 return -1;
2534 }
Guido van Rossume5372401993-03-16 12:15:04 +00002535 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002536 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002537}
2538
Tim Peters38fd5b62000-09-21 05:43:11 +00002539/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2540 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2541 * Python's regular ints.
2542 * Return value: a new PyString*, or NULL if error.
2543 * . *pbuf is set to point into it,
2544 * *plen set to the # of chars following that.
2545 * Caller must decref it when done using pbuf.
2546 * The string starting at *pbuf is of the form
2547 * "-"? ("0x" | "0X")? digit+
2548 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002549 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002550 * There will be at least prec digits, zero-filled on the left if
2551 * necessary to get that many.
2552 * val object to be converted
2553 * flags bitmask of format flags; only F_ALT is looked at
2554 * prec minimum number of digits; 0-fill on left if needed
2555 * type a character in [duoxX]; u acts the same as d
2556 *
2557 * CAUTION: o, x and X conversions on regular ints can never
2558 * produce a '-' sign, but can for Python's unbounded ints.
2559 */
2560PyObject*
2561_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2562 char **pbuf, int *plen)
2563{
2564 PyObject *result = NULL;
2565 char *buf;
2566 int i;
2567 int sign; /* 1 if '-', else 0 */
2568 int len; /* number of characters */
2569 int numdigits; /* len == numnondigits + numdigits */
2570 int numnondigits = 0;
2571
2572 switch (type) {
2573 case 'd':
2574 case 'u':
2575 result = val->ob_type->tp_str(val);
2576 break;
2577 case 'o':
2578 result = val->ob_type->tp_as_number->nb_oct(val);
2579 break;
2580 case 'x':
2581 case 'X':
2582 numnondigits = 2;
2583 result = val->ob_type->tp_as_number->nb_hex(val);
2584 break;
2585 default:
2586 assert(!"'type' not in [duoxX]");
2587 }
2588 if (!result)
2589 return NULL;
2590
2591 /* To modify the string in-place, there can only be one reference. */
2592 if (result->ob_refcnt != 1) {
2593 PyErr_BadInternalCall();
2594 return NULL;
2595 }
2596 buf = PyString_AsString(result);
2597 len = PyString_Size(result);
2598 if (buf[len-1] == 'L') {
2599 --len;
2600 buf[len] = '\0';
2601 }
2602 sign = buf[0] == '-';
2603 numnondigits += sign;
2604 numdigits = len - numnondigits;
2605 assert(numdigits > 0);
2606
Tim Petersfff53252001-04-12 18:38:48 +00002607 /* Get rid of base marker unless F_ALT */
2608 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002609 /* Need to skip 0x, 0X or 0. */
2610 int skipped = 0;
2611 switch (type) {
2612 case 'o':
2613 assert(buf[sign] == '0');
2614 /* If 0 is only digit, leave it alone. */
2615 if (numdigits > 1) {
2616 skipped = 1;
2617 --numdigits;
2618 }
2619 break;
2620 case 'x':
2621 case 'X':
2622 assert(buf[sign] == '0');
2623 assert(buf[sign + 1] == 'x');
2624 skipped = 2;
2625 numnondigits -= 2;
2626 break;
2627 }
2628 if (skipped) {
2629 buf += skipped;
2630 len -= skipped;
2631 if (sign)
2632 buf[0] = '-';
2633 }
2634 assert(len == numnondigits + numdigits);
2635 assert(numdigits > 0);
2636 }
2637
2638 /* Fill with leading zeroes to meet minimum width. */
2639 if (prec > numdigits) {
2640 PyObject *r1 = PyString_FromStringAndSize(NULL,
2641 numnondigits + prec);
2642 char *b1;
2643 if (!r1) {
2644 Py_DECREF(result);
2645 return NULL;
2646 }
2647 b1 = PyString_AS_STRING(r1);
2648 for (i = 0; i < numnondigits; ++i)
2649 *b1++ = *buf++;
2650 for (i = 0; i < prec - numdigits; i++)
2651 *b1++ = '0';
2652 for (i = 0; i < numdigits; i++)
2653 *b1++ = *buf++;
2654 *b1 = '\0';
2655 Py_DECREF(result);
2656 result = r1;
2657 buf = PyString_AS_STRING(result);
2658 len = numnondigits + prec;
2659 }
2660
2661 /* Fix up case for hex conversions. */
2662 switch (type) {
2663 case 'x':
2664 /* Need to convert all upper case letters to lower case. */
2665 for (i = 0; i < len; i++)
2666 if (buf[i] >= 'A' && buf[i] <= 'F')
2667 buf[i] += 'a'-'A';
2668 break;
2669 case 'X':
2670 /* Need to convert 0x to 0X (and -0x to -0X). */
2671 if (buf[sign + 1] == 'x')
2672 buf[sign + 1] = 'X';
2673 break;
2674 }
2675 *pbuf = buf;
2676 *plen = len;
2677 return result;
2678}
2679
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002680static int
Fred Drakeba096332000-07-09 07:04:36 +00002681formatint(char *buf, size_t buflen, int flags,
2682 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002683{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002684 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002685 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2686 + 1 + 1 = 24 */
2687 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002688 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002689 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002690 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002691 if (prec < 0)
2692 prec = 1;
2693 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002694 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002695 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002696 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002697 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002698 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002699 return -1;
2700 }
Guido van Rossume5372401993-03-16 12:15:04 +00002701 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002702 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2703 * but we want it (for consistency with other %#x conversions, and
2704 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002705 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2706 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2707 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002708 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002709 if (x == 0 &&
2710 (flags & F_ALT) &&
2711 (type == 'x' || type == 'X') &&
2712 buf[1] != (char)type) /* this last always true under std C */
2713 {
Tim Petersfff53252001-04-12 18:38:48 +00002714 memmove(buf+2, buf, strlen(buf) + 1);
2715 buf[0] = '0';
2716 buf[1] = (char)type;
2717 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002718 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002719}
2720
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002721static int
Fred Drakeba096332000-07-09 07:04:36 +00002722formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002723{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002724 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002725 if (PyString_Check(v)) {
2726 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002727 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002728 }
2729 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002730 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002731 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002732 }
2733 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002734 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002735}
2736
Guido van Rossum013142a1994-08-30 08:19:36 +00002737
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002738/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2739
2740 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2741 chars are formatted. XXX This is a magic number. Each formatting
2742 routine does bounds checking to ensure no overflow, but a better
2743 solution may be to malloc a buffer of appropriate size for each
2744 format. For now, the current solution is sufficient.
2745*/
2746#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002747
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002748PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002749PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002750{
2751 char *fmt, *res;
2752 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002753 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002754 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002755 PyObject *dict = NULL;
2756 if (format == NULL || !PyString_Check(format) || args == NULL) {
2757 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002758 return NULL;
2759 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002760 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002761 fmt = PyString_AsString(format);
2762 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002763 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002764 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002765 if (result == NULL)
2766 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002767 res = PyString_AsString(result);
2768 if (PyTuple_Check(args)) {
2769 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002770 argidx = 0;
2771 }
2772 else {
2773 arglen = -1;
2774 argidx = -2;
2775 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002776 if (args->ob_type->tp_as_mapping)
2777 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002778 while (--fmtcnt >= 0) {
2779 if (*fmt != '%') {
2780 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002781 rescnt = fmtcnt + 100;
2782 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002783 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002784 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002785 res = PyString_AsString(result)
2786 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002787 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002788 }
2789 *res++ = *fmt++;
2790 }
2791 else {
2792 /* Got a format specifier */
2793 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002794 int width = -1;
2795 int prec = -1;
Guido van Rossum6938a291993-11-11 14:51:57 +00002796 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002797 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002798 PyObject *v = NULL;
2799 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002800 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002801 int sign;
2802 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002803 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002804 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002805 int argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002806
Guido van Rossumda9c2711996-12-05 21:58:58 +00002807 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002808 if (*fmt == '(') {
2809 char *keystart;
2810 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002811 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002812 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002813
2814 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002815 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002816 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002817 goto error;
2818 }
2819 ++fmt;
2820 --fmtcnt;
2821 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002822 /* Skip over balanced parentheses */
2823 while (pcount > 0 && --fmtcnt >= 0) {
2824 if (*fmt == ')')
2825 --pcount;
2826 else if (*fmt == '(')
2827 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002828 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002829 }
2830 keylen = fmt - keystart - 1;
2831 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002832 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002833 "incomplete format key");
2834 goto error;
2835 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002836 key = PyString_FromStringAndSize(keystart,
2837 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002838 if (key == NULL)
2839 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002840 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002841 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002842 args_owned = 0;
2843 }
2844 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002845 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002846 if (args == NULL) {
2847 goto error;
2848 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002849 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002850 arglen = -1;
2851 argidx = -2;
2852 }
Guido van Rossume5372401993-03-16 12:15:04 +00002853 while (--fmtcnt >= 0) {
2854 switch (c = *fmt++) {
2855 case '-': flags |= F_LJUST; continue;
2856 case '+': flags |= F_SIGN; continue;
2857 case ' ': flags |= F_BLANK; continue;
2858 case '#': flags |= F_ALT; continue;
2859 case '0': flags |= F_ZERO; continue;
2860 }
2861 break;
2862 }
2863 if (c == '*') {
2864 v = getnextarg(args, arglen, &argidx);
2865 if (v == NULL)
2866 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002867 if (!PyInt_Check(v)) {
2868 PyErr_SetString(PyExc_TypeError,
2869 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002870 goto error;
2871 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002872 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002873 if (width < 0) {
2874 flags |= F_LJUST;
2875 width = -width;
2876 }
Guido van Rossume5372401993-03-16 12:15:04 +00002877 if (--fmtcnt >= 0)
2878 c = *fmt++;
2879 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002880 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002881 width = c - '0';
2882 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002883 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002884 if (!isdigit(c))
2885 break;
2886 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002887 PyErr_SetString(
2888 PyExc_ValueError,
2889 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002890 goto error;
2891 }
2892 width = width*10 + (c - '0');
2893 }
2894 }
2895 if (c == '.') {
2896 prec = 0;
2897 if (--fmtcnt >= 0)
2898 c = *fmt++;
2899 if (c == '*') {
2900 v = getnextarg(args, arglen, &argidx);
2901 if (v == NULL)
2902 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002903 if (!PyInt_Check(v)) {
2904 PyErr_SetString(
2905 PyExc_TypeError,
2906 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002907 goto error;
2908 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002909 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002910 if (prec < 0)
2911 prec = 0;
2912 if (--fmtcnt >= 0)
2913 c = *fmt++;
2914 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002915 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002916 prec = c - '0';
2917 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002918 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002919 if (!isdigit(c))
2920 break;
2921 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002922 PyErr_SetString(
2923 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002924 "prec too big");
2925 goto error;
2926 }
2927 prec = prec*10 + (c - '0');
2928 }
2929 }
2930 } /* prec */
2931 if (fmtcnt >= 0) {
2932 if (c == 'h' || c == 'l' || c == 'L') {
Guido van Rossume5372401993-03-16 12:15:04 +00002933 if (--fmtcnt >= 0)
2934 c = *fmt++;
2935 }
2936 }
2937 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002938 PyErr_SetString(PyExc_ValueError,
2939 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002940 goto error;
2941 }
2942 if (c != '%') {
2943 v = getnextarg(args, arglen, &argidx);
2944 if (v == NULL)
2945 goto error;
2946 }
2947 sign = 0;
2948 fill = ' ';
2949 switch (c) {
2950 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002951 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002952 len = 1;
2953 break;
2954 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002955 case 'r':
2956 if (PyUnicode_Check(v)) {
2957 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002958 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00002959 goto unicode;
2960 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002961 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002962 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002963 else
2964 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002965 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002966 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002967 if (!PyString_Check(temp)) {
2968 PyErr_SetString(PyExc_TypeError,
2969 "%s argument has non-string str()");
2970 goto error;
2971 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002972 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002973 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002974 if (prec >= 0 && len > prec)
2975 len = prec;
2976 break;
2977 case 'i':
2978 case 'd':
2979 case 'u':
2980 case 'o':
2981 case 'x':
2982 case 'X':
2983 if (c == 'i')
2984 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00002985 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002986 temp = _PyString_FormatLong(v, flags,
2987 prec, c, &pbuf, &len);
2988 if (!temp)
2989 goto error;
2990 /* unbounded ints can always produce
2991 a sign character! */
2992 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002993 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002994 else {
2995 pbuf = formatbuf;
2996 len = formatint(pbuf, sizeof(formatbuf),
2997 flags, prec, c, v);
2998 if (len < 0)
2999 goto error;
3000 /* only d conversion is signed */
3001 sign = c == 'd';
3002 }
3003 if (flags & F_ZERO)
3004 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00003005 break;
3006 case 'e':
3007 case 'E':
3008 case 'f':
3009 case 'g':
3010 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003011 pbuf = formatbuf;
3012 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003013 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003014 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003015 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003016 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003017 fill = '0';
3018 break;
3019 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003020 pbuf = formatbuf;
3021 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003022 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003023 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003024 break;
3025 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003026 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003027 "unsupported format character '%c' (0x%x) "
3028 "at index %i",
3029 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003030 goto error;
3031 }
3032 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003033 if (*pbuf == '-' || *pbuf == '+') {
3034 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003035 len--;
3036 }
3037 else if (flags & F_SIGN)
3038 sign = '+';
3039 else if (flags & F_BLANK)
3040 sign = ' ';
3041 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003042 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003043 }
3044 if (width < len)
3045 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003046 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003047 reslen -= rescnt;
3048 rescnt = width + fmtcnt + 100;
3049 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003050 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003051 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003052 res = PyString_AsString(result)
3053 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003054 }
3055 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003056 if (fill != ' ')
3057 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003058 rescnt--;
3059 if (width > len)
3060 width--;
3061 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003062 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3063 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003064 assert(pbuf[1] == c);
3065 if (fill != ' ') {
3066 *res++ = *pbuf++;
3067 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003068 }
Tim Petersfff53252001-04-12 18:38:48 +00003069 rescnt -= 2;
3070 width -= 2;
3071 if (width < 0)
3072 width = 0;
3073 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003074 }
3075 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003076 do {
3077 --rescnt;
3078 *res++ = fill;
3079 } while (--width > len);
3080 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003081 if (fill == ' ') {
3082 if (sign)
3083 *res++ = sign;
3084 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003085 (c == 'x' || c == 'X')) {
3086 assert(pbuf[0] == '0');
3087 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003088 *res++ = *pbuf++;
3089 *res++ = *pbuf++;
3090 }
3091 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003092 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003093 res += len;
3094 rescnt -= len;
3095 while (--width >= len) {
3096 --rescnt;
3097 *res++ = ' ';
3098 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003099 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003100 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003101 "not all arguments converted");
3102 goto error;
3103 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003104 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003105 } /* '%' */
3106 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003107 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003108 PyErr_SetString(PyExc_TypeError,
3109 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003110 goto error;
3111 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003112 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003113 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003114 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003115 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003116 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003117
3118 unicode:
3119 if (args_owned) {
3120 Py_DECREF(args);
3121 args_owned = 0;
3122 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003123 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003124 if (PyTuple_Check(orig_args) && argidx > 0) {
3125 PyObject *v;
3126 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3127 v = PyTuple_New(n);
3128 if (v == NULL)
3129 goto error;
3130 while (--n >= 0) {
3131 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3132 Py_INCREF(w);
3133 PyTuple_SET_ITEM(v, n, w);
3134 }
3135 args = v;
3136 } else {
3137 Py_INCREF(orig_args);
3138 args = orig_args;
3139 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003140 args_owned = 1;
3141 /* Take what we have of the result and let the Unicode formatting
3142 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003143 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003144 if (_PyString_Resize(&result, rescnt))
3145 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003146 fmtcnt = PyString_GET_SIZE(format) - \
3147 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003148 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3149 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003150 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003151 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003152 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003153 if (v == NULL)
3154 goto error;
3155 /* Paste what we have (result) to what the Unicode formatting
3156 function returned (v) and return the result (or error) */
3157 w = PyUnicode_Concat(result, v);
3158 Py_DECREF(result);
3159 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003160 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003161 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003162
Guido van Rossume5372401993-03-16 12:15:04 +00003163 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003164 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003165 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003166 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003167 }
Guido van Rossume5372401993-03-16 12:15:04 +00003168 return NULL;
3169}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003170
3171
3172#ifdef INTERN_STRINGS
3173
Barry Warsaw4df762f2000-08-16 23:41:01 +00003174/* This dictionary will leak at PyString_Fini() time. That's acceptable
3175 * because PyString_Fini() specifically frees interned strings that are
3176 * only referenced by this dictionary. The CVS log entry for revision 2.45
3177 * says:
3178 *
3179 * Change the Fini function to only remove otherwise unreferenced
3180 * strings from the interned table. There are references in
3181 * hard-to-find static variables all over the interpreter, and it's not
3182 * worth trying to get rid of all those; but "uninterning" isn't fair
3183 * either and may cause subtle failures later -- so we have to keep them
3184 * in the interned table.
3185 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003186static PyObject *interned;
3187
3188void
Fred Drakeba096332000-07-09 07:04:36 +00003189PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003190{
3191 register PyStringObject *s = (PyStringObject *)(*p);
3192 PyObject *t;
3193 if (s == NULL || !PyString_Check(s))
3194 Py_FatalError("PyString_InternInPlace: strings only please!");
3195 if ((t = s->ob_sinterned) != NULL) {
3196 if (t == (PyObject *)s)
3197 return;
3198 Py_INCREF(t);
3199 *p = t;
3200 Py_DECREF(s);
3201 return;
3202 }
3203 if (interned == NULL) {
3204 interned = PyDict_New();
3205 if (interned == NULL)
3206 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003207 }
3208 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3209 Py_INCREF(t);
3210 *p = s->ob_sinterned = t;
3211 Py_DECREF(s);
3212 return;
3213 }
3214 t = (PyObject *)s;
3215 if (PyDict_SetItem(interned, t, t) == 0) {
3216 s->ob_sinterned = t;
3217 return;
3218 }
3219 PyErr_Clear();
3220}
3221
3222
3223PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003224PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003225{
3226 PyObject *s = PyString_FromString(cp);
3227 if (s == NULL)
3228 return NULL;
3229 PyString_InternInPlace(&s);
3230 return s;
3231}
3232
3233#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003234
3235void
Fred Drakeba096332000-07-09 07:04:36 +00003236PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003237{
3238 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003239 for (i = 0; i < UCHAR_MAX + 1; i++) {
3240 Py_XDECREF(characters[i]);
3241 characters[i] = NULL;
3242 }
3243#ifndef DONT_SHARE_SHORT_STRINGS
3244 Py_XDECREF(nullstring);
3245 nullstring = NULL;
3246#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003247#ifdef INTERN_STRINGS
3248 if (interned) {
3249 int pos, changed;
3250 PyObject *key, *value;
3251 do {
3252 changed = 0;
3253 pos = 0;
3254 while (PyDict_Next(interned, &pos, &key, &value)) {
3255 if (key->ob_refcnt == 2 && key == value) {
3256 PyDict_DelItem(interned, key);
3257 changed = 1;
3258 }
3259 }
3260 } while (changed);
3261 }
3262#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003263}
Barry Warsawa903ad982001-02-23 16:40:48 +00003264
3265#ifdef INTERN_STRINGS
3266void _Py_ReleaseInternedStrings(void)
3267{
3268 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003269 fprintf(stderr, "releasing interned strings\n");
3270 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003271 Py_DECREF(interned);
3272 interned = NULL;
3273 }
3274}
3275#endif /* INTERN_STRINGS */