blob: 87d7c1957ed119a0ebde7a02c818e9db500fa001 [file] [log] [blame]
Guido van Rossumf70e43a1991-02-19 12:39:46 +00001
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002/* String object implementation */
3
Guido van Rossumc0b618a1997-05-02 03:12:38 +00004#include "Python.h"
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00005
Guido van Rossum013142a1994-08-30 08:19:36 +00006#include <ctype.h>
7
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +00008#ifdef COUNT_ALLOCS
9int null_strings, one_strings;
10#endif
11
Fred Draked5fadf72000-09-26 05:46:01 +000012#if !defined(HAVE_LIMITS_H) && !defined(UCHAR_MAX)
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000013#define UCHAR_MAX 255
14#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000015
Guido van Rossumc0b618a1997-05-02 03:12:38 +000016static PyStringObject *characters[UCHAR_MAX + 1];
Sjoerd Mullender615194a1993-11-01 13:46:50 +000017#ifndef DONT_SHARE_SHORT_STRINGS
Guido van Rossumc0b618a1997-05-02 03:12:38 +000018static PyStringObject *nullstring;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000019#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000020
21/*
22 Newsizedstringobject() and newstringobject() try in certain cases
23 to share string objects. When the size of the string is zero,
24 these routines always return a pointer to the same string object;
25 when the size is one, they return a pointer to an already existing
26 object if the contents of the string is known. For
27 newstringobject() this is always the case, for
28 newsizedstringobject() this is the case when the first argument in
29 not NULL.
30 A common practice to allocate a string and then fill it in or
31 change it must be done carefully. It is only allowed to change the
32 contents of the string if the obect was gotten from
33 newsizedstringobject() with a NULL first argument, because in the
34 future these routines may try to do even more sharing of objects.
35*/
Guido van Rossumc0b618a1997-05-02 03:12:38 +000036PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000037PyString_FromStringAndSize(const char *str, int size)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000038{
Guido van Rossumc0b618a1997-05-02 03:12:38 +000039 register PyStringObject *op;
Sjoerd Mullender615194a1993-11-01 13:46:50 +000040#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000041 if (size == 0 && (op = nullstring) != NULL) {
42#ifdef COUNT_ALLOCS
43 null_strings++;
44#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000045 Py_INCREF(op);
46 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000047 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +000048 if (size == 1 && str != NULL &&
49 (op = characters[*str & UCHAR_MAX]) != NULL)
50 {
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000051#ifdef COUNT_ALLOCS
52 one_strings++;
53#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000054 Py_INCREF(op);
55 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000056 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000057#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +000058
59 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +000060 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +000061 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +000062 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +000063 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +000064 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000065#ifdef CACHE_HASH
66 op->ob_shash = -1;
67#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +000068#ifdef INTERN_STRINGS
69 op->ob_sinterned = NULL;
70#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +000071 if (str != NULL)
72 memcpy(op->ob_sval, str, size);
73 op->ob_sval[size] = '\0';
Sjoerd Mullender615194a1993-11-01 13:46:50 +000074#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000075 if (size == 0) {
76 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000077 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000078 } else if (size == 1 && str != NULL) {
79 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +000080 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000081 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000082#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +000083 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000084}
85
Guido van Rossumc0b618a1997-05-02 03:12:38 +000086PyObject *
Fred Drakeba096332000-07-09 07:04:36 +000087PyString_FromString(const char *str)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +000088{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000089 register size_t size = strlen(str);
Guido van Rossumc0b618a1997-05-02 03:12:38 +000090 register PyStringObject *op;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +000091 if (size > INT_MAX) {
92 PyErr_SetString(PyExc_OverflowError,
93 "string is too long for a Python string");
94 return NULL;
95 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +000096#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +000097 if (size == 0 && (op = nullstring) != NULL) {
98#ifdef COUNT_ALLOCS
99 null_strings++;
100#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000101 Py_INCREF(op);
102 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000103 }
104 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
105#ifdef COUNT_ALLOCS
106 one_strings++;
107#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000108 Py_INCREF(op);
109 return (PyObject *)op;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000110 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000111#endif /* DONT_SHARE_SHORT_STRINGS */
Guido van Rossumb18618d2000-05-03 23:44:39 +0000112
113 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000114 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000115 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000116 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000117 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000118 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000119#ifdef CACHE_HASH
120 op->ob_shash = -1;
121#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000122#ifdef INTERN_STRINGS
123 op->ob_sinterned = NULL;
124#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000125 strcpy(op->ob_sval, str);
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000126#ifndef DONT_SHARE_SHORT_STRINGS
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000127 if (size == 0) {
128 nullstring = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000129 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000130 } else if (size == 1) {
131 characters[*str & UCHAR_MAX] = op;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000132 Py_INCREF(op);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000133 }
Sjoerd Mullender615194a1993-11-01 13:46:50 +0000134#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000135 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000136}
137
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000138PyObject *PyString_Decode(const char *s,
139 int size,
140 const char *encoding,
141 const char *errors)
142{
143 PyObject *buffer = NULL, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000144
145 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000146 encoding = PyUnicode_GetDefaultEncoding();
147
148 /* Decode via the codec registry */
149 buffer = PyBuffer_FromMemory((void *)s, size);
150 if (buffer == NULL)
151 goto onError;
152 str = PyCodec_Decode(buffer, encoding, errors);
153 if (str == NULL)
154 goto onError;
155 /* Convert Unicode to a string using the default encoding */
156 if (PyUnicode_Check(str)) {
157 PyObject *temp = str;
158 str = PyUnicode_AsEncodedString(str, NULL, NULL);
159 Py_DECREF(temp);
160 if (str == NULL)
161 goto onError;
162 }
163 if (!PyString_Check(str)) {
164 PyErr_Format(PyExc_TypeError,
Andrew M. Kuchlingbd9848d2000-07-12 02:58:28 +0000165 "decoder did not return a string object (type=%.400s)",
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000166 str->ob_type->tp_name);
167 Py_DECREF(str);
168 goto onError;
169 }
170 Py_DECREF(buffer);
171 return str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000172
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000173 onError:
174 Py_XDECREF(buffer);
175 return NULL;
176}
177
178PyObject *PyString_Encode(const char *s,
179 int size,
180 const char *encoding,
181 const char *errors)
182{
183 PyObject *v, *str;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000184
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000185 str = PyString_FromStringAndSize(s, size);
186 if (str == NULL)
187 return NULL;
188 v = PyString_AsEncodedString(str, encoding, errors);
189 Py_DECREF(str);
190 return v;
191}
192
193PyObject *PyString_AsEncodedString(PyObject *str,
194 const char *encoding,
195 const char *errors)
196{
197 PyObject *v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000198
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000199 if (!PyString_Check(str)) {
200 PyErr_BadArgument();
201 goto onError;
202 }
203
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000204 if (encoding == NULL)
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000205 encoding = PyUnicode_GetDefaultEncoding();
206
207 /* Encode via the codec registry */
208 v = PyCodec_Encode(str, encoding, errors);
209 if (v == NULL)
210 goto onError;
211 /* Convert Unicode to a string using the default encoding */
212 if (PyUnicode_Check(v)) {
213 PyObject *temp = v;
214 v = PyUnicode_AsEncodedString(v, NULL, NULL);
215 Py_DECREF(temp);
216 if (v == NULL)
217 goto onError;
218 }
219 if (!PyString_Check(v)) {
220 PyErr_Format(PyExc_TypeError,
221 "encoder did not return a string object (type=%.400s)",
222 v->ob_type->tp_name);
223 Py_DECREF(v);
224 goto onError;
225 }
226 return v;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000227
Marc-André Lemburg63f3d172000-07-06 11:29:01 +0000228 onError:
229 return NULL;
230}
231
Guido van Rossum234f9421993-06-17 12:35:49 +0000232static void
Fred Drakeba096332000-07-09 07:04:36 +0000233string_dealloc(PyObject *op)
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000234{
Guido van Rossumb18618d2000-05-03 23:44:39 +0000235 PyObject_DEL(op);
Guido van Rossum719f5fa1992-03-27 17:31:02 +0000236}
237
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000238static int
239string_getsize(register PyObject *op)
240{
241 char *s;
242 int len;
243 if (PyString_AsStringAndSize(op, &s, &len))
244 return -1;
245 return len;
246}
247
248static /*const*/ char *
249string_getbuffer(register PyObject *op)
250{
251 char *s;
252 int len;
253 if (PyString_AsStringAndSize(op, &s, &len))
254 return NULL;
255 return s;
256}
257
Guido van Rossumd7047b31995-01-02 19:07:15 +0000258int
Fred Drakeba096332000-07-09 07:04:36 +0000259PyString_Size(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000260{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000261 if (!PyString_Check(op))
262 return string_getsize(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000263 return ((PyStringObject *)op) -> ob_size;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000264}
265
266/*const*/ char *
Fred Drakeba096332000-07-09 07:04:36 +0000267PyString_AsString(register PyObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000268{
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000269 if (!PyString_Check(op))
270 return string_getbuffer(op);
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000271 return ((PyStringObject *)op) -> ob_sval;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000272}
273
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000274/* Internal API needed by PyString_AsStringAndSize(): */
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000275extern
Marc-André Lemburgd1ba4432000-09-19 21:04:18 +0000276PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
277 const char *errors);
278
279int
280PyString_AsStringAndSize(register PyObject *obj,
281 register char **s,
282 register int *len)
283{
284 if (s == NULL) {
285 PyErr_BadInternalCall();
286 return -1;
287 }
288
289 if (!PyString_Check(obj)) {
290 if (PyUnicode_Check(obj)) {
291 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
292 if (obj == NULL)
293 return -1;
294 }
295 else {
296 PyErr_Format(PyExc_TypeError,
297 "expected string or Unicode object, "
298 "%.200s found", obj->ob_type->tp_name);
299 return -1;
300 }
301 }
302
303 *s = PyString_AS_STRING(obj);
304 if (len != NULL)
305 *len = PyString_GET_SIZE(obj);
306 else if ((int)strlen(*s) != PyString_GET_SIZE(obj)) {
307 PyErr_SetString(PyExc_TypeError,
308 "expected string without null bytes");
309 return -1;
310 }
311 return 0;
312}
313
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000314/* Methods */
315
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000316static int
Fred Drakeba096332000-07-09 07:04:36 +0000317string_print(PyStringObject *op, FILE *fp, int flags)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000318{
319 int i;
320 char c;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000321 int quote;
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000322 /* XXX Ought to check for interrupts when writing long strings */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000323 if (flags & Py_PRINT_RAW) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000324 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000325 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000326 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000327
Thomas Wouters7e474022000-07-16 12:04:32 +0000328 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000329 quote = '\'';
330 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
331 quote = '"';
332
333 fputc(quote, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000334 for (i = 0; i < op->ob_size; i++) {
335 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000336 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000337 fprintf(fp, "\\%c", c);
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000338 else if (c == '\t')
339 fprintf(fp, "\\t");
340 else if (c == '\n')
341 fprintf(fp, "\\n");
342 else if (c == '\r')
343 fprintf(fp, "\\r");
344 else if (c < ' ' || c >= 0x7f)
345 fprintf(fp, "\\x%02x", c & 0xff);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000346 else
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000347 fputc(c, fp);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000348 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000349 fputc(quote, fp);
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000350 return 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000351}
352
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000353static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000354string_repr(register PyStringObject *op)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000355{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +0000356 size_t newsize = 2 + 4 * op->ob_size * sizeof(char);
357 PyObject *v;
358 if (newsize > INT_MAX) {
359 PyErr_SetString(PyExc_OverflowError,
360 "string is too large to make repr");
361 }
362 v = PyString_FromStringAndSize((char *)NULL, newsize);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000363 if (v == NULL) {
Guido van Rossumbcaa31c1991-06-07 22:58:57 +0000364 return NULL;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000365 }
366 else {
367 register int i;
368 register char c;
369 register char *p;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000370 int quote;
371
Thomas Wouters7e474022000-07-16 12:04:32 +0000372 /* figure out which quote to use; single is preferred */
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000373 quote = '\'';
374 if (strchr(op->ob_sval, '\'') && !strchr(op->ob_sval, '"'))
375 quote = '"';
376
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000377 p = ((PyStringObject *)v)->ob_sval;
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000378 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000379 for (i = 0; i < op->ob_size; i++) {
380 c = op->ob_sval[i];
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000381 if (c == quote || c == '\\')
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000382 *p++ = '\\', *p++ = c;
Ka-Ping Yeefa004ad2001-01-24 17:19:08 +0000383 else if (c == '\t')
384 *p++ = '\\', *p++ = 't';
385 else if (c == '\n')
386 *p++ = '\\', *p++ = 'n';
387 else if (c == '\r')
388 *p++ = '\\', *p++ = 'r';
389 else if (c < ' ' || c >= 0x7f) {
390 sprintf(p, "\\x%02x", c & 0xff);
391 p += 4;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000392 }
393 else
394 *p++ = c;
395 }
Guido van Rossum444fc7c1993-10-26 15:25:16 +0000396 *p++ = quote;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000397 *p = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000398 _PyString_Resize(
399 &v, (int) (p - ((PyStringObject *)v)->ob_sval));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000400 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000401 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000402}
403
Guido van Rossum189f1df2001-05-01 16:51:53 +0000404static PyObject *
405string_str(PyObject *s)
406{
407 Py_INCREF(s);
408 return s;
409}
410
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000411static int
Fred Drakeba096332000-07-09 07:04:36 +0000412string_length(PyStringObject *a)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000413{
414 return a->ob_size;
415}
416
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000417static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000418string_concat(register PyStringObject *a, register PyObject *bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000419{
420 register unsigned int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000421 register PyStringObject *op;
422 if (!PyString_Check(bb)) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000423 if (PyUnicode_Check(bb))
424 return PyUnicode_Concat((PyObject *)a, bb);
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000425 PyErr_Format(PyExc_TypeError,
Fred Drakeb6a9ada2000-06-01 03:12:13 +0000426 "cannot add type \"%.200s\" to string",
427 bb->ob_type->tp_name);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000428 return NULL;
429 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000430#define b ((PyStringObject *)bb)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000431 /* Optimize cases with empty left or right operand */
432 if (a->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000433 Py_INCREF(bb);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000434 return bb;
435 }
436 if (b->ob_size == 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000437 Py_INCREF(a);
438 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000439 }
440 size = a->ob_size + b->ob_size;
Guido van Rossumb18618d2000-05-03 23:44:39 +0000441 /* PyObject_NewVar is inlined */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000442 op = (PyStringObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +0000443 PyObject_MALLOC(sizeof(PyStringObject) + size * sizeof(char));
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000444 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000445 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000446 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000447#ifdef CACHE_HASH
448 op->ob_shash = -1;
449#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000450#ifdef INTERN_STRINGS
451 op->ob_sinterned = NULL;
452#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000453 memcpy(op->ob_sval, a->ob_sval, (int) a->ob_size);
454 memcpy(op->ob_sval + a->ob_size, b->ob_sval, (int) b->ob_size);
455 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000456 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000457#undef b
458}
459
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000460static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000461string_repeat(register PyStringObject *a, register int n)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000462{
463 register int i;
Guido van Rossum2095d241997-04-09 19:41:24 +0000464 register int size;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000465 register PyStringObject *op;
Tim Peters8f422462000-09-09 06:13:41 +0000466 size_t nbytes;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000467 if (n < 0)
468 n = 0;
Tim Peters8f422462000-09-09 06:13:41 +0000469 /* watch out for overflows: the size can overflow int,
470 * and the # of bytes needed can overflow size_t
471 */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000472 size = a->ob_size * n;
Tim Peters8f422462000-09-09 06:13:41 +0000473 if (n && size / n != a->ob_size) {
474 PyErr_SetString(PyExc_OverflowError,
475 "repeated string is too long");
476 return NULL;
477 }
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000478 if (size == a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000479 Py_INCREF(a);
480 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000481 }
Tim Peters8f422462000-09-09 06:13:41 +0000482 nbytes = size * sizeof(char);
483 if (nbytes / sizeof(char) != (size_t)size ||
484 nbytes + sizeof(PyStringObject) <= nbytes) {
485 PyErr_SetString(PyExc_OverflowError,
486 "repeated string is too long");
487 return NULL;
488 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000489 op = (PyStringObject *)
Tim Peters8f422462000-09-09 06:13:41 +0000490 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000491 if (op == NULL)
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000492 return PyErr_NoMemory();
Guido van Rossumb18618d2000-05-03 23:44:39 +0000493 PyObject_INIT_VAR(op, &PyString_Type, size);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000494#ifdef CACHE_HASH
495 op->ob_shash = -1;
496#endif
Guido van Rossum2a61e741997-01-18 07:55:05 +0000497#ifdef INTERN_STRINGS
498 op->ob_sinterned = NULL;
499#endif
Guido van Rossum2a9096b1990-10-21 22:15:08 +0000500 for (i = 0; i < size; i += a->ob_size)
501 memcpy(op->ob_sval+i, a->ob_sval, (int) a->ob_size);
502 op->ob_sval[size] = '\0';
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000503 return (PyObject *) op;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000504}
505
506/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
507
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000508static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000509string_slice(register PyStringObject *a, register int i, register int j)
510 /* j -- may be negative! */
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000511{
512 if (i < 0)
513 i = 0;
514 if (j < 0)
515 j = 0; /* Avoid signed/unsigned bug in next line */
516 if (j > a->ob_size)
517 j = a->ob_size;
518 if (i == 0 && j == a->ob_size) { /* It's the same as a */
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000519 Py_INCREF(a);
520 return (PyObject *)a;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000521 }
522 if (j < i)
523 j = i;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000524 return PyString_FromStringAndSize(a->ob_sval + i, (int) (j-i));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000525}
526
Guido van Rossum9284a572000-03-07 15:53:43 +0000527static int
Fred Drakeba096332000-07-09 07:04:36 +0000528string_contains(PyObject *a, PyObject *el)
Guido van Rossum9284a572000-03-07 15:53:43 +0000529{
530 register char *s, *end;
531 register char c;
Guido van Rossum90daa872000-04-10 13:47:21 +0000532 if (PyUnicode_Check(el))
Guido van Rossum96a45ad2000-03-13 15:56:08 +0000533 return PyUnicode_Contains(a, el);
Guido van Rossum90daa872000-04-10 13:47:21 +0000534 if (!PyString_Check(el) || PyString_Size(el) != 1) {
Guido van Rossum9284a572000-03-07 15:53:43 +0000535 PyErr_SetString(PyExc_TypeError,
Andrew M. Kuchlingcb95a142000-06-09 14:04:53 +0000536 "'in <string>' requires character as left operand");
Guido van Rossum9284a572000-03-07 15:53:43 +0000537 return -1;
538 }
539 c = PyString_AsString(el)[0];
540 s = PyString_AsString(a);
541 end = s + PyString_Size(a);
542 while (s < end) {
543 if (c == *s++)
544 return 1;
545 }
546 return 0;
547}
548
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000549static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000550string_item(PyStringObject *a, register int i)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000551{
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000552 int c;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000553 PyObject *v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000554 if (i < 0 || i >= a->ob_size) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000555 PyErr_SetString(PyExc_IndexError, "string index out of range");
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000556 return NULL;
557 }
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000558 c = a->ob_sval[i] & UCHAR_MAX;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000559 v = (PyObject *) characters[c];
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000560#ifdef COUNT_ALLOCS
561 if (v != NULL)
562 one_strings++;
563#endif
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000564 if (v == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000565 v = PyString_FromStringAndSize((char *)NULL, 1);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000566 if (v == NULL)
567 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000568 characters[c] = (PyStringObject *) v;
569 ((PyStringObject *)v)->ob_sval[0] = c;
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000570 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000571 Py_INCREF(v);
Guido van Rossumdaa8bb31991-04-04 10:48:33 +0000572 return v;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000573}
574
575static int
Fred Drakeba096332000-07-09 07:04:36 +0000576string_compare(PyStringObject *a, PyStringObject *b)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000577{
Guido van Rossum253919f1991-02-13 23:18:39 +0000578 int len_a = a->ob_size, len_b = b->ob_size;
579 int min_len = (len_a < len_b) ? len_a : len_b;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000580 int cmp;
581 if (min_len > 0) {
Guido van Rossumfde7a751996-10-23 14:19:40 +0000582 cmp = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000583 if (cmp == 0)
584 cmp = memcmp(a->ob_sval, b->ob_sval, min_len);
585 if (cmp != 0)
586 return cmp;
587 }
Guido van Rossum253919f1991-02-13 23:18:39 +0000588 return (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000589}
590
Guido van Rossum9bfef441993-03-29 10:43:31 +0000591static long
Fred Drakeba096332000-07-09 07:04:36 +0000592string_hash(PyStringObject *a)
Guido van Rossum9bfef441993-03-29 10:43:31 +0000593{
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000594 register int len;
595 register unsigned char *p;
596 register long x;
597
598#ifdef CACHE_HASH
599 if (a->ob_shash != -1)
600 return a->ob_shash;
Guido van Rossum36b9f791997-02-14 16:29:22 +0000601#ifdef INTERN_STRINGS
602 if (a->ob_sinterned != NULL)
603 return (a->ob_shash =
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000604 ((PyStringObject *)(a->ob_sinterned))->ob_shash);
Guido van Rossum36b9f791997-02-14 16:29:22 +0000605#endif
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000606#endif
607 len = a->ob_size;
608 p = (unsigned char *) a->ob_sval;
609 x = *p << 7;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000610 while (--len >= 0)
Guido van Rossumeddcb3b1996-09-11 20:22:48 +0000611 x = (1000003*x) ^ *p++;
Guido van Rossum9bfef441993-03-29 10:43:31 +0000612 x ^= a->ob_size;
613 if (x == -1)
614 x = -2;
Sjoerd Mullender3bb8a051993-10-22 12:04:32 +0000615#ifdef CACHE_HASH
616 a->ob_shash = x;
617#endif
Guido van Rossum9bfef441993-03-29 10:43:31 +0000618 return x;
619}
620
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000621static int
Fred Drakeba096332000-07-09 07:04:36 +0000622string_buffer_getreadbuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000623{
624 if ( index != 0 ) {
Guido van Rossum045e6881997-09-08 18:30:11 +0000625 PyErr_SetString(PyExc_SystemError,
Guido van Rossum1db70701998-10-08 02:18:52 +0000626 "accessing non-existent string segment");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000627 return -1;
628 }
629 *ptr = (void *)self->ob_sval;
630 return self->ob_size;
631}
632
633static int
Fred Drakeba096332000-07-09 07:04:36 +0000634string_buffer_getwritebuf(PyStringObject *self, int index, const void **ptr)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000635{
Guido van Rossum045e6881997-09-08 18:30:11 +0000636 PyErr_SetString(PyExc_TypeError,
Guido van Rossum07d78001998-10-01 15:59:48 +0000637 "Cannot use string as modifiable buffer");
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000638 return -1;
639}
640
641static int
Fred Drakeba096332000-07-09 07:04:36 +0000642string_buffer_getsegcount(PyStringObject *self, int *lenp)
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000643{
644 if ( lenp )
645 *lenp = self->ob_size;
646 return 1;
647}
648
Guido van Rossum1db70701998-10-08 02:18:52 +0000649static int
Fred Drakeba096332000-07-09 07:04:36 +0000650string_buffer_getcharbuf(PyStringObject *self, int index, const char **ptr)
Guido van Rossum1db70701998-10-08 02:18:52 +0000651{
652 if ( index != 0 ) {
653 PyErr_SetString(PyExc_SystemError,
654 "accessing non-existent string segment");
655 return -1;
656 }
657 *ptr = self->ob_sval;
658 return self->ob_size;
659}
660
Guido van Rossumc0b618a1997-05-02 03:12:38 +0000661static PySequenceMethods string_as_sequence = {
Guido van Rossum013142a1994-08-30 08:19:36 +0000662 (inquiry)string_length, /*sq_length*/
663 (binaryfunc)string_concat, /*sq_concat*/
664 (intargfunc)string_repeat, /*sq_repeat*/
665 (intargfunc)string_item, /*sq_item*/
666 (intintargfunc)string_slice, /*sq_slice*/
Guido van Rossumf380e661991-06-04 19:36:32 +0000667 0, /*sq_ass_item*/
668 0, /*sq_ass_slice*/
Guido van Rossum9284a572000-03-07 15:53:43 +0000669 (objobjproc)string_contains /*sq_contains*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +0000670};
671
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000672static PyBufferProcs string_as_buffer = {
673 (getreadbufferproc)string_buffer_getreadbuf,
674 (getwritebufferproc)string_buffer_getwritebuf,
675 (getsegcountproc)string_buffer_getsegcount,
Guido van Rossum1db70701998-10-08 02:18:52 +0000676 (getcharbufferproc)string_buffer_getcharbuf,
Guido van Rossumfdf95dd1997-05-05 22:15:02 +0000677};
678
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000679
680
681#define LEFTSTRIP 0
682#define RIGHTSTRIP 1
683#define BOTHSTRIP 2
684
685
686static PyObject *
Tim Petersc2e7da92000-07-09 08:02:21 +0000687split_whitespace(const char *s, int len, int maxsplit)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000688{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000689 int i, j, err;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000690 PyObject* item;
691 PyObject *list = PyList_New(0);
692
693 if (list == NULL)
694 return NULL;
695
Guido van Rossum4c08d552000-03-10 22:55:18 +0000696 for (i = j = 0; i < len; ) {
697 while (i < len && isspace(Py_CHARMASK(s[i])))
698 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000699 j = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000700 while (i < len && !isspace(Py_CHARMASK(s[i])))
701 i++;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000702 if (j < i) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000703 if (maxsplit-- <= 0)
704 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000705 item = PyString_FromStringAndSize(s+j, (int)(i-j));
706 if (item == NULL)
707 goto finally;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000708 err = PyList_Append(list, item);
709 Py_DECREF(item);
710 if (err < 0)
711 goto finally;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000712 while (i < len && isspace(Py_CHARMASK(s[i])))
713 i++;
714 j = i;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000715 }
716 }
Guido van Rossum4c08d552000-03-10 22:55:18 +0000717 if (j < len) {
718 item = PyString_FromStringAndSize(s+j, (int)(len - j));
719 if (item == NULL)
720 goto finally;
721 err = PyList_Append(list, item);
722 Py_DECREF(item);
723 if (err < 0)
724 goto finally;
725 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000726 return list;
727 finally:
728 Py_DECREF(list);
729 return NULL;
730}
731
732
733static char split__doc__[] =
734"S.split([sep [,maxsplit]]) -> list of strings\n\
735\n\
736Return a list of the words in the string S, using sep as the\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000737delimiter string. If maxsplit is given, at most maxsplit\n\
738splits are done. If sep is not specified, any whitespace string\n\
739is a separator.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000740
741static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000742string_split(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000743{
744 int len = PyString_GET_SIZE(self), n, i, j, err;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000745 int maxsplit = -1;
746 const char *s = PyString_AS_STRING(self), *sub;
747 PyObject *list, *item, *subobj = Py_None;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000748
Guido van Rossum4c08d552000-03-10 22:55:18 +0000749 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000750 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000751 if (maxsplit < 0)
752 maxsplit = INT_MAX;
753 if (subobj == Py_None)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000754 return split_whitespace(s, len, maxsplit);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000755 if (PyString_Check(subobj)) {
756 sub = PyString_AS_STRING(subobj);
757 n = PyString_GET_SIZE(subobj);
758 }
759 else if (PyUnicode_Check(subobj))
760 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
761 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
762 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000763 if (n == 0) {
764 PyErr_SetString(PyExc_ValueError, "empty separator");
765 return NULL;
766 }
767
768 list = PyList_New(0);
769 if (list == NULL)
770 return NULL;
771
772 i = j = 0;
773 while (i+n <= len) {
Fred Drake396f6e02000-06-20 15:47:54 +0000774 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
Guido van Rossum4c08d552000-03-10 22:55:18 +0000775 if (maxsplit-- <= 0)
776 break;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000777 item = PyString_FromStringAndSize(s+j, (int)(i-j));
778 if (item == NULL)
779 goto fail;
780 err = PyList_Append(list, item);
781 Py_DECREF(item);
782 if (err < 0)
783 goto fail;
784 i = j = i + n;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000785 }
786 else
787 i++;
788 }
789 item = PyString_FromStringAndSize(s+j, (int)(len-j));
790 if (item == NULL)
791 goto fail;
792 err = PyList_Append(list, item);
793 Py_DECREF(item);
794 if (err < 0)
795 goto fail;
796
797 return list;
798
799 fail:
800 Py_DECREF(list);
801 return NULL;
802}
803
804
805static char join__doc__[] =
806"S.join(sequence) -> string\n\
807\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +0000808Return a string which is the concatenation of the strings in the\n\
809sequence. The separator between elements is S.";
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000810
811static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000812string_join(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000813{
814 char *sep = PyString_AS_STRING(self);
Tim Peters19fe14e2001-01-19 03:03:47 +0000815 const int seplen = PyString_GET_SIZE(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000816 PyObject *res = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000817 char *p;
818 int seqlen = 0;
Tim Peters19fe14e2001-01-19 03:03:47 +0000819 size_t sz = 0;
820 int i;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000821 PyObject *orig, *seq, *item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000822
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000823 if (!PyArg_ParseTuple(args, "O:join", &orig))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000824 return NULL;
825
Tim Peters19fe14e2001-01-19 03:03:47 +0000826 seq = PySequence_Fast(orig, "");
827 if (seq == NULL) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000828 if (PyErr_ExceptionMatches(PyExc_TypeError))
829 PyErr_Format(PyExc_TypeError,
830 "sequence expected, %.80s found",
831 orig->ob_type->tp_name);
832 return NULL;
833 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000834
Jeremy Hylton03657cf2000-07-12 13:05:33 +0000835 seqlen = PySequence_Size(seq);
Tim Peters19fe14e2001-01-19 03:03:47 +0000836 if (seqlen == 0) {
837 Py_DECREF(seq);
838 return PyString_FromString("");
839 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000840 if (seqlen == 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000841 item = PySequence_Fast_GET_ITEM(seq, 0);
Tim Peters19fe14e2001-01-19 03:03:47 +0000842 if (!PyString_Check(item) && !PyUnicode_Check(item)) {
843 PyErr_Format(PyExc_TypeError,
844 "sequence item 0: expected string,"
845 " %.80s found",
846 item->ob_type->tp_name);
847 Py_DECREF(seq);
848 return NULL;
849 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000850 Py_INCREF(item);
Barry Warsaw771d0672000-07-11 04:58:12 +0000851 Py_DECREF(seq);
Guido van Rossum4c08d552000-03-10 22:55:18 +0000852 return item;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000853 }
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000854
Tim Peters19fe14e2001-01-19 03:03:47 +0000855 /* There are at least two things to join. Do a pre-pass to figure out
856 * the total amount of space we'll need (sz), see whether any argument
857 * is absurd, and defer to the Unicode join if appropriate.
858 */
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000859 for (i = 0; i < seqlen; i++) {
Tim Peters19fe14e2001-01-19 03:03:47 +0000860 const size_t old_sz = sz;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000861 item = PySequence_Fast_GET_ITEM(seq, i);
862 if (!PyString_Check(item)){
863 if (PyUnicode_Check(item)) {
Tim Peters2cfe3682001-05-05 05:36:48 +0000864 /* Defer to Unicode join.
865 * CAUTION: There's no gurantee that the
866 * original sequence can be iterated over
867 * again, so we must pass seq here.
868 */
869 PyObject *result;
870 result = PyUnicode_Join((PyObject *)self, seq);
Barry Warsaw771d0672000-07-11 04:58:12 +0000871 Py_DECREF(seq);
Tim Peters2cfe3682001-05-05 05:36:48 +0000872 return result;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000873 }
874 PyErr_Format(PyExc_TypeError,
Jeremy Hylton88887aa2000-07-11 20:55:38 +0000875 "sequence item %i: expected string,"
876 " %.80s found",
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000877 i, item->ob_type->tp_name);
Tim Peters19fe14e2001-01-19 03:03:47 +0000878 Py_DECREF(seq);
879 return NULL;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000880 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000881 sz += PyString_GET_SIZE(item);
882 if (i != 0)
883 sz += seplen;
884 if (sz < old_sz || sz > INT_MAX) {
885 PyErr_SetString(PyExc_OverflowError,
886 "join() is too long for a Python string");
887 Py_DECREF(seq);
888 return NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000889 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000890 }
891
892 /* Allocate result space. */
893 res = PyString_FromStringAndSize((char*)NULL, (int)sz);
894 if (res == NULL) {
895 Py_DECREF(seq);
896 return NULL;
897 }
898
899 /* Catenate everything. */
900 p = PyString_AS_STRING(res);
901 for (i = 0; i < seqlen; ++i) {
902 size_t n;
903 item = PySequence_Fast_GET_ITEM(seq, i);
904 n = PyString_GET_SIZE(item);
905 memcpy(p, PyString_AS_STRING(item), n);
906 p += n;
907 if (i < seqlen - 1) {
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000908 memcpy(p, sep, seplen);
909 p += seplen;
Jeremy Hylton194e43e2000-07-10 21:30:28 +0000910 }
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000911 }
Tim Peters19fe14e2001-01-19 03:03:47 +0000912
Jeremy Hylton49048292000-07-11 03:28:17 +0000913 Py_DECREF(seq);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000914 return res;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000915}
916
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000917static long
Fred Drakeba096332000-07-09 07:04:36 +0000918string_find_internal(PyStringObject *self, PyObject *args, int dir)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000919{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000920 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000921 int len = PyString_GET_SIZE(self);
922 int n, i = 0, last = INT_MAX;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000923 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000924
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000925 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
Guido van Rossumc6821402000-05-08 14:08:05 +0000926 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Guido van Rossum4c08d552000-03-10 22:55:18 +0000927 return -2;
928 if (PyString_Check(subobj)) {
929 sub = PyString_AS_STRING(subobj);
930 n = PyString_GET_SIZE(subobj);
931 }
932 else if (PyUnicode_Check(subobj))
933 return PyUnicode_Find((PyObject *)self, subobj, i, last, 1);
934 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000935 return -2;
936
937 if (last > len)
938 last = len;
939 if (last < 0)
940 last += len;
941 if (last < 0)
942 last = 0;
943 if (i < 0)
944 i += len;
945 if (i < 0)
946 i = 0;
947
Guido van Rossum4c08d552000-03-10 22:55:18 +0000948 if (dir > 0) {
949 if (n == 0 && i <= last)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000950 return (long)i;
Guido van Rossum4c08d552000-03-10 22:55:18 +0000951 last -= n;
952 for (; i <= last; ++i)
Fred Drake396f6e02000-06-20 15:47:54 +0000953 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000954 return (long)i;
955 }
956 else {
957 int j;
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000958
Guido van Rossum4c08d552000-03-10 22:55:18 +0000959 if (n == 0 && i <= last)
960 return (long)last;
961 for (j = last-n; j >= i; --j)
Fred Drake396f6e02000-06-20 15:47:54 +0000962 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
Guido van Rossum4c08d552000-03-10 22:55:18 +0000963 return (long)j;
964 }
Tim Petersb3d8d1f2001-04-28 05:38:26 +0000965
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000966 return -1;
967}
968
969
970static char find__doc__[] =
971"S.find(sub [,start [,end]]) -> int\n\
972\n\
973Return the lowest index in S where substring sub is found,\n\
974such that sub is contained within s[start,end]. Optional\n\
975arguments start and end are interpreted as in slice notation.\n\
976\n\
977Return -1 on failure.";
978
979static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000980string_find(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000981{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000982 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000983 if (result == -2)
984 return NULL;
985 return PyInt_FromLong(result);
986}
987
988
989static char index__doc__[] =
990"S.index(sub [,start [,end]]) -> int\n\
991\n\
992Like S.find() but raise ValueError when the substring is not found.";
993
994static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +0000995string_index(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000996{
Guido van Rossum4c08d552000-03-10 22:55:18 +0000997 long result = string_find_internal(self, args, +1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +0000998 if (result == -2)
999 return NULL;
1000 if (result == -1) {
1001 PyErr_SetString(PyExc_ValueError,
1002 "substring not found in string.index");
1003 return NULL;
1004 }
1005 return PyInt_FromLong(result);
1006}
1007
1008
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001009static char rfind__doc__[] =
1010"S.rfind(sub [,start [,end]]) -> int\n\
1011\n\
1012Return the highest index in S where substring sub is found,\n\
1013such that sub is contained within s[start,end]. Optional\n\
1014arguments start and end are interpreted as in slice notation.\n\
1015\n\
1016Return -1 on failure.";
1017
1018static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001019string_rfind(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001020{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001021 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001022 if (result == -2)
1023 return NULL;
1024 return PyInt_FromLong(result);
1025}
1026
1027
1028static char rindex__doc__[] =
1029"S.rindex(sub [,start [,end]]) -> int\n\
1030\n\
1031Like S.rfind() but raise ValueError when the substring is not found.";
1032
1033static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001034string_rindex(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001035{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001036 long result = string_find_internal(self, args, -1);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001037 if (result == -2)
1038 return NULL;
1039 if (result == -1) {
1040 PyErr_SetString(PyExc_ValueError,
1041 "substring not found in string.rindex");
1042 return NULL;
1043 }
1044 return PyInt_FromLong(result);
1045}
1046
1047
1048static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001049do_strip(PyStringObject *self, PyObject *args, int striptype)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001050{
1051 char *s = PyString_AS_STRING(self);
1052 int len = PyString_GET_SIZE(self), i, j;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001053
Guido van Rossum43713e52000-02-29 13:59:29 +00001054 if (!PyArg_ParseTuple(args, ":strip"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001055 return NULL;
1056
1057 i = 0;
1058 if (striptype != RIGHTSTRIP) {
1059 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1060 i++;
1061 }
1062 }
1063
1064 j = len;
1065 if (striptype != LEFTSTRIP) {
1066 do {
1067 j--;
1068 } while (j >= i && isspace(Py_CHARMASK(s[j])));
1069 j++;
1070 }
1071
1072 if (i == 0 && j == len) {
1073 Py_INCREF(self);
1074 return (PyObject*)self;
1075 }
1076 else
1077 return PyString_FromStringAndSize(s+i, j-i);
1078}
1079
1080
1081static char strip__doc__[] =
1082"S.strip() -> string\n\
1083\n\
1084Return a copy of the string S with leading and trailing\n\
1085whitespace removed.";
1086
1087static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001088string_strip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001089{
1090 return do_strip(self, args, BOTHSTRIP);
1091}
1092
1093
1094static char lstrip__doc__[] =
1095"S.lstrip() -> string\n\
1096\n\
1097Return a copy of the string S with leading whitespace removed.";
1098
1099static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001100string_lstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001101{
1102 return do_strip(self, args, LEFTSTRIP);
1103}
1104
1105
1106static char rstrip__doc__[] =
1107"S.rstrip() -> string\n\
1108\n\
1109Return a copy of the string S with trailing whitespace removed.";
1110
1111static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001112string_rstrip(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001113{
1114 return do_strip(self, args, RIGHTSTRIP);
1115}
1116
1117
1118static char lower__doc__[] =
1119"S.lower() -> string\n\
1120\n\
1121Return a copy of the string S converted to lowercase.";
1122
1123static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001124string_lower(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001125{
1126 char *s = PyString_AS_STRING(self), *s_new;
1127 int i, n = PyString_GET_SIZE(self);
1128 PyObject *new;
1129
Guido van Rossum43713e52000-02-29 13:59:29 +00001130 if (!PyArg_ParseTuple(args, ":lower"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001131 return NULL;
1132 new = PyString_FromStringAndSize(NULL, n);
1133 if (new == NULL)
1134 return NULL;
1135 s_new = PyString_AsString(new);
1136 for (i = 0; i < n; i++) {
1137 int c = Py_CHARMASK(*s++);
1138 if (isupper(c)) {
1139 *s_new = tolower(c);
1140 } else
1141 *s_new = c;
1142 s_new++;
1143 }
1144 return new;
1145}
1146
1147
1148static char upper__doc__[] =
1149"S.upper() -> string\n\
1150\n\
1151Return a copy of the string S converted to uppercase.";
1152
1153static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001154string_upper(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001155{
1156 char *s = PyString_AS_STRING(self), *s_new;
1157 int i, n = PyString_GET_SIZE(self);
1158 PyObject *new;
1159
Guido van Rossum43713e52000-02-29 13:59:29 +00001160 if (!PyArg_ParseTuple(args, ":upper"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001161 return NULL;
1162 new = PyString_FromStringAndSize(NULL, n);
1163 if (new == NULL)
1164 return NULL;
1165 s_new = PyString_AsString(new);
1166 for (i = 0; i < n; i++) {
1167 int c = Py_CHARMASK(*s++);
1168 if (islower(c)) {
1169 *s_new = toupper(c);
1170 } else
1171 *s_new = c;
1172 s_new++;
1173 }
1174 return new;
1175}
1176
1177
Guido van Rossum4c08d552000-03-10 22:55:18 +00001178static char title__doc__[] =
1179"S.title() -> string\n\
1180\n\
1181Return a titlecased version of S, i.e. words start with uppercase\n\
1182characters, all remaining cased characters have lowercase.";
1183
1184static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00001185string_title(PyStringObject *self, PyObject *args)
Guido van Rossum4c08d552000-03-10 22:55:18 +00001186{
1187 char *s = PyString_AS_STRING(self), *s_new;
1188 int i, n = PyString_GET_SIZE(self);
1189 int previous_is_cased = 0;
1190 PyObject *new;
1191
1192 if (!PyArg_ParseTuple(args, ":title"))
1193 return NULL;
1194 new = PyString_FromStringAndSize(NULL, n);
1195 if (new == NULL)
1196 return NULL;
1197 s_new = PyString_AsString(new);
1198 for (i = 0; i < n; i++) {
1199 int c = Py_CHARMASK(*s++);
1200 if (islower(c)) {
1201 if (!previous_is_cased)
1202 c = toupper(c);
1203 previous_is_cased = 1;
1204 } else if (isupper(c)) {
1205 if (previous_is_cased)
1206 c = tolower(c);
1207 previous_is_cased = 1;
1208 } else
1209 previous_is_cased = 0;
1210 *s_new++ = c;
1211 }
1212 return new;
1213}
1214
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001215static char capitalize__doc__[] =
1216"S.capitalize() -> string\n\
1217\n\
1218Return a copy of the string S with only its first character\n\
1219capitalized.";
1220
1221static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001222string_capitalize(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001223{
1224 char *s = PyString_AS_STRING(self), *s_new;
1225 int i, n = PyString_GET_SIZE(self);
1226 PyObject *new;
1227
Guido van Rossum43713e52000-02-29 13:59:29 +00001228 if (!PyArg_ParseTuple(args, ":capitalize"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001229 return NULL;
1230 new = PyString_FromStringAndSize(NULL, n);
1231 if (new == NULL)
1232 return NULL;
1233 s_new = PyString_AsString(new);
1234 if (0 < n) {
1235 int c = Py_CHARMASK(*s++);
1236 if (islower(c))
1237 *s_new = toupper(c);
1238 else
1239 *s_new = c;
1240 s_new++;
1241 }
1242 for (i = 1; i < n; i++) {
1243 int c = Py_CHARMASK(*s++);
1244 if (isupper(c))
1245 *s_new = tolower(c);
1246 else
1247 *s_new = c;
1248 s_new++;
1249 }
1250 return new;
1251}
1252
1253
1254static char count__doc__[] =
1255"S.count(sub[, start[, end]]) -> int\n\
1256\n\
1257Return the number of occurrences of substring sub in string\n\
1258S[start:end]. Optional arguments start and end are\n\
1259interpreted as in slice notation.";
1260
1261static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001262string_count(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001263{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001264 const char *s = PyString_AS_STRING(self), *sub;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001265 int len = PyString_GET_SIZE(self), n;
1266 int i = 0, last = INT_MAX;
1267 int m, r;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001268 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001269
Guido van Rossumc6821402000-05-08 14:08:05 +00001270 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
1271 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001272 return NULL;
Guido van Rossumc6821402000-05-08 14:08:05 +00001273
Guido van Rossum4c08d552000-03-10 22:55:18 +00001274 if (PyString_Check(subobj)) {
1275 sub = PyString_AS_STRING(subobj);
1276 n = PyString_GET_SIZE(subobj);
1277 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001278 else if (PyUnicode_Check(subobj)) {
1279 int count;
1280 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
1281 if (count == -1)
1282 return NULL;
1283 else
1284 return PyInt_FromLong((long) count);
1285 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001286 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1287 return NULL;
1288
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001289 if (last > len)
1290 last = len;
1291 if (last < 0)
1292 last += len;
1293 if (last < 0)
1294 last = 0;
1295 if (i < 0)
1296 i += len;
1297 if (i < 0)
1298 i = 0;
1299 m = last + 1 - n;
1300 if (n == 0)
1301 return PyInt_FromLong((long) (m-i));
1302
1303 r = 0;
1304 while (i < m) {
1305 if (!memcmp(s+i, sub, n)) {
1306 r++;
1307 i += n;
1308 } else {
1309 i++;
1310 }
1311 }
1312 return PyInt_FromLong((long) r);
1313}
1314
1315
1316static char swapcase__doc__[] =
1317"S.swapcase() -> string\n\
1318\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00001319Return a copy of the string S with uppercase characters\n\
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001320converted to lowercase and vice versa.";
1321
1322static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001323string_swapcase(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001324{
1325 char *s = PyString_AS_STRING(self), *s_new;
1326 int i, n = PyString_GET_SIZE(self);
1327 PyObject *new;
1328
Guido van Rossum43713e52000-02-29 13:59:29 +00001329 if (!PyArg_ParseTuple(args, ":swapcase"))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001330 return NULL;
1331 new = PyString_FromStringAndSize(NULL, n);
1332 if (new == NULL)
1333 return NULL;
1334 s_new = PyString_AsString(new);
1335 for (i = 0; i < n; i++) {
1336 int c = Py_CHARMASK(*s++);
1337 if (islower(c)) {
1338 *s_new = toupper(c);
1339 }
1340 else if (isupper(c)) {
1341 *s_new = tolower(c);
1342 }
1343 else
1344 *s_new = c;
1345 s_new++;
1346 }
1347 return new;
1348}
1349
1350
1351static char translate__doc__[] =
1352"S.translate(table [,deletechars]) -> string\n\
1353\n\
1354Return a copy of the string S, where all characters occurring\n\
1355in the optional argument deletechars are removed, and the\n\
1356remaining characters have been mapped through the given\n\
1357translation table, which must be a string of length 256.";
1358
1359static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001360string_translate(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001361{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001362 register char *input, *output;
1363 register const char *table;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001364 register int i, c, changed = 0;
1365 PyObject *input_obj = (PyObject*)self;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001366 const char *table1, *output_start, *del_table=NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001367 int inlen, tablen, dellen = 0;
1368 PyObject *result;
1369 int trans_table[256];
Guido van Rossum4c08d552000-03-10 22:55:18 +00001370 PyObject *tableobj, *delobj = NULL;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001371
Guido van Rossum4c08d552000-03-10 22:55:18 +00001372 if (!PyArg_ParseTuple(args, "O|O:translate",
1373 &tableobj, &delobj))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001374 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001375
1376 if (PyString_Check(tableobj)) {
1377 table1 = PyString_AS_STRING(tableobj);
1378 tablen = PyString_GET_SIZE(tableobj);
1379 }
1380 else if (PyUnicode_Check(tableobj)) {
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001381 /* Unicode .translate() does not support the deletechars
Guido van Rossum4c08d552000-03-10 22:55:18 +00001382 parameter; instead a mapping to None will cause characters
1383 to be deleted. */
1384 if (delobj != NULL) {
1385 PyErr_SetString(PyExc_TypeError,
1386 "deletions are implemented differently for unicode");
1387 return NULL;
1388 }
1389 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
1390 }
1391 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001392 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001393
1394 if (delobj != NULL) {
1395 if (PyString_Check(delobj)) {
1396 del_table = PyString_AS_STRING(delobj);
1397 dellen = PyString_GET_SIZE(delobj);
1398 }
1399 else if (PyUnicode_Check(delobj)) {
1400 PyErr_SetString(PyExc_TypeError,
1401 "deletions are implemented differently for unicode");
1402 return NULL;
1403 }
1404 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
1405 return NULL;
1406
1407 if (tablen != 256) {
1408 PyErr_SetString(PyExc_ValueError,
1409 "translation table must be 256 characters long");
1410 return NULL;
1411 }
1412 }
1413 else {
1414 del_table = NULL;
1415 dellen = 0;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001416 }
1417
1418 table = table1;
1419 inlen = PyString_Size(input_obj);
1420 result = PyString_FromStringAndSize((char *)NULL, inlen);
1421 if (result == NULL)
1422 return NULL;
1423 output_start = output = PyString_AsString(result);
1424 input = PyString_AsString(input_obj);
1425
1426 if (dellen == 0) {
1427 /* If no deletions are required, use faster code */
1428 for (i = inlen; --i >= 0; ) {
1429 c = Py_CHARMASK(*input++);
1430 if (Py_CHARMASK((*output++ = table[c])) != c)
1431 changed = 1;
1432 }
1433 if (changed)
1434 return result;
1435 Py_DECREF(result);
1436 Py_INCREF(input_obj);
1437 return input_obj;
1438 }
1439
1440 for (i = 0; i < 256; i++)
1441 trans_table[i] = Py_CHARMASK(table[i]);
1442
1443 for (i = 0; i < dellen; i++)
1444 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
1445
1446 for (i = inlen; --i >= 0; ) {
1447 c = Py_CHARMASK(*input++);
1448 if (trans_table[c] != -1)
1449 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1450 continue;
1451 changed = 1;
1452 }
1453 if (!changed) {
1454 Py_DECREF(result);
1455 Py_INCREF(input_obj);
1456 return input_obj;
1457 }
1458 /* Fix the size of the resulting string */
1459 if (inlen > 0 &&_PyString_Resize(&result, output-output_start))
1460 return NULL;
1461 return result;
1462}
1463
1464
1465/* What follows is used for implementing replace(). Perry Stoll. */
1466
1467/*
1468 mymemfind
1469
1470 strstr replacement for arbitrary blocks of memory.
1471
Barry Warsaw51ac5802000-03-20 16:36:48 +00001472 Locates the first occurrence in the memory pointed to by MEM of the
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001473 contents of memory pointed to by PAT. Returns the index into MEM if
1474 found, or -1 if not found. If len of PAT is greater than length of
1475 MEM, the function returns -1.
1476*/
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001477static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001478mymemfind(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001479{
1480 register int ii;
1481
1482 /* pattern can not occur in the last pat_len-1 chars */
1483 len -= pat_len;
1484
1485 for (ii = 0; ii <= len; ii++) {
Fred Drake396f6e02000-06-20 15:47:54 +00001486 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001487 return ii;
1488 }
1489 }
1490 return -1;
1491}
1492
1493/*
1494 mymemcnt
1495
1496 Return the number of distinct times PAT is found in MEM.
1497 meaning mem=1111 and pat==11 returns 2.
1498 mem=11111 and pat==11 also return 2.
1499 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001500static int
Tim Petersc2e7da92000-07-09 08:02:21 +00001501mymemcnt(const char *mem, int len, const char *pat, int pat_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001502{
1503 register int offset = 0;
1504 int nfound = 0;
1505
1506 while (len >= 0) {
1507 offset = mymemfind(mem, len, pat, pat_len);
1508 if (offset == -1)
1509 break;
1510 mem += offset + pat_len;
1511 len -= offset + pat_len;
1512 nfound++;
1513 }
1514 return nfound;
1515}
1516
1517/*
1518 mymemreplace
1519
Thomas Wouters7e474022000-07-16 12:04:32 +00001520 Return a string in which all occurrences of PAT in memory STR are
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001521 replaced with SUB.
1522
Thomas Wouters7e474022000-07-16 12:04:32 +00001523 If length of PAT is less than length of STR or there are no occurrences
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001524 of PAT in STR, then the original string is returned. Otherwise, a new
1525 string is allocated here and returned.
1526
1527 on return, out_len is:
1528 the length of output string, or
1529 -1 if the input string is returned, or
1530 unchanged if an error occurs (no memory).
1531
1532 return value is:
1533 the new string allocated locally, or
1534 NULL if an error occurred.
1535*/
1536static char *
Tim Petersc2e7da92000-07-09 08:02:21 +00001537mymemreplace(const char *str, int len, /* input string */
1538 const char *pat, int pat_len, /* pattern string to find */
1539 const char *sub, int sub_len, /* substitution string */
1540 int count, /* number of replacements */
Fred Drakeba096332000-07-09 07:04:36 +00001541 int *out_len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001542{
1543 char *out_s;
1544 char *new_s;
1545 int nfound, offset, new_len;
1546
1547 if (len == 0 || pat_len > len)
1548 goto return_same;
1549
1550 /* find length of output string */
1551 nfound = mymemcnt(str, len, pat, pat_len);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001552 if (count < 0)
1553 count = INT_MAX;
1554 else if (nfound > count)
1555 nfound = count;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001556 if (nfound == 0)
1557 goto return_same;
1558 new_len = len + nfound*(sub_len - pat_len);
1559
Guido van Rossumb18618d2000-05-03 23:44:39 +00001560 new_s = (char *)PyMem_MALLOC(new_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001561 if (new_s == NULL) return NULL;
1562
1563 *out_len = new_len;
1564 out_s = new_s;
1565
1566 while (len > 0) {
1567 /* find index of next instance of pattern */
1568 offset = mymemfind(str, len, pat, pat_len);
1569 /* if not found, break out of loop */
1570 if (offset == -1) break;
1571
1572 /* copy non matching part of input string */
1573 memcpy(new_s, str, offset); /* copy part of str before pat */
1574 str += offset + pat_len; /* move str past pattern */
1575 len -= offset + pat_len; /* reduce length of str remaining */
1576
1577 /* copy substitute into the output string */
1578 new_s += offset; /* move new_s to dest for sub string */
1579 memcpy(new_s, sub, sub_len); /* copy substring into new_s */
1580 new_s += sub_len; /* offset new_s past sub string */
1581
1582 /* break when we've done count replacements */
1583 if (--count == 0) break;
1584 }
1585 /* copy any remaining values into output string */
1586 if (len > 0)
1587 memcpy(new_s, str, len);
1588 return out_s;
1589
1590 return_same:
1591 *out_len = -1;
Tim Petersc2e7da92000-07-09 08:02:21 +00001592 return (char*)str; /* have to cast away constness here */
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001593}
1594
1595
1596static char replace__doc__[] =
1597"S.replace (old, new[, maxsplit]) -> string\n\
1598\n\
1599Return a copy of string S with all occurrences of substring\n\
1600old replaced by new. If the optional argument maxsplit is\n\
1601given, only the first maxsplit occurrences are replaced.";
1602
1603static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001604string_replace(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001605{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001606 const char *str = PyString_AS_STRING(self), *sub, *repl;
1607 char *new_s;
1608 int len = PyString_GET_SIZE(self), sub_len, repl_len, out_len;
1609 int count = -1;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001610 PyObject *new;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001611 PyObject *subobj, *replobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001612
Guido van Rossum4c08d552000-03-10 22:55:18 +00001613 if (!PyArg_ParseTuple(args, "OO|i:replace",
1614 &subobj, &replobj, &count))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001615 return NULL;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001616
1617 if (PyString_Check(subobj)) {
1618 sub = PyString_AS_STRING(subobj);
1619 sub_len = PyString_GET_SIZE(subobj);
1620 }
1621 else if (PyUnicode_Check(subobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001622 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001623 subobj, replobj, count);
1624 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1625 return NULL;
1626
1627 if (PyString_Check(replobj)) {
1628 repl = PyString_AS_STRING(replobj);
1629 repl_len = PyString_GET_SIZE(replobj);
1630 }
1631 else if (PyUnicode_Check(replobj))
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001632 return PyUnicode_Replace((PyObject *)self,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001633 subobj, replobj, count);
1634 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
1635 return NULL;
1636
Guido van Rossum96a45ad2000-03-13 15:56:08 +00001637 if (sub_len <= 0) {
Barry Warsaw51ac5802000-03-20 16:36:48 +00001638 PyErr_SetString(PyExc_ValueError, "empty pattern string");
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001639 return NULL;
1640 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001641 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001642 if (new_s == NULL) {
1643 PyErr_NoMemory();
1644 return NULL;
1645 }
1646 if (out_len == -1) {
1647 /* we're returning another reference to self */
1648 new = (PyObject*)self;
1649 Py_INCREF(new);
1650 }
1651 else {
1652 new = PyString_FromStringAndSize(new_s, out_len);
Guido van Rossumb18618d2000-05-03 23:44:39 +00001653 PyMem_FREE(new_s);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001654 }
1655 return new;
1656}
1657
1658
1659static char startswith__doc__[] =
1660"S.startswith(prefix[, start[, end]]) -> int\n\
1661\n\
1662Return 1 if S starts with the specified prefix, otherwise return 0. With\n\
1663optional start, test S beginning at that position. With optional end, stop\n\
1664comparing S at that position.";
1665
1666static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001667string_startswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001668{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001669 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001670 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001671 const char* prefix;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001672 int plen;
1673 int start = 0;
1674 int end = -1;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001675 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001676
Guido van Rossumc6821402000-05-08 14:08:05 +00001677 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
1678 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001679 return NULL;
1680 if (PyString_Check(subobj)) {
1681 prefix = PyString_AS_STRING(subobj);
1682 plen = PyString_GET_SIZE(subobj);
1683 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001684 else if (PyUnicode_Check(subobj)) {
1685 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001686 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001687 subobj, start, end, -1);
1688 if (rc == -1)
1689 return NULL;
1690 else
1691 return PyInt_FromLong((long) rc);
1692 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001693 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001694 return NULL;
1695
1696 /* adopt Java semantics for index out of range. it is legal for
1697 * offset to be == plen, but this only returns true if prefix is
1698 * the empty string.
1699 */
1700 if (start < 0 || start+plen > len)
1701 return PyInt_FromLong(0);
1702
1703 if (!memcmp(str+start, prefix, plen)) {
1704 /* did the match end after the specified end? */
1705 if (end < 0)
1706 return PyInt_FromLong(1);
1707 else if (end - start < plen)
1708 return PyInt_FromLong(0);
1709 else
1710 return PyInt_FromLong(1);
1711 }
1712 else return PyInt_FromLong(0);
1713}
1714
1715
1716static char endswith__doc__[] =
1717"S.endswith(suffix[, start[, end]]) -> int\n\
1718\n\
1719Return 1 if S ends with the specified suffix, otherwise return 0. With\n\
1720optional start, test S beginning at that position. With optional end, stop\n\
1721comparing S at that position.";
1722
1723static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00001724string_endswith(PyStringObject *self, PyObject *args)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001725{
Guido van Rossum4c08d552000-03-10 22:55:18 +00001726 const char* str = PyString_AS_STRING(self);
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001727 int len = PyString_GET_SIZE(self);
Guido van Rossum4c08d552000-03-10 22:55:18 +00001728 const char* suffix;
1729 int slen;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001730 int start = 0;
1731 int end = -1;
1732 int lower, upper;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001733 PyObject *subobj;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001734
Guido van Rossumc6821402000-05-08 14:08:05 +00001735 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
1736 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
Guido van Rossum4c08d552000-03-10 22:55:18 +00001737 return NULL;
1738 if (PyString_Check(subobj)) {
1739 suffix = PyString_AS_STRING(subobj);
1740 slen = PyString_GET_SIZE(subobj);
1741 }
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001742 else if (PyUnicode_Check(subobj)) {
1743 int rc;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001744 rc = PyUnicode_Tailmatch((PyObject *)self,
Marc-André Lemburg3a645e42001-01-16 11:54:12 +00001745 subobj, start, end, +1);
1746 if (rc == -1)
1747 return NULL;
1748 else
1749 return PyInt_FromLong((long) rc);
1750 }
Guido van Rossum4c08d552000-03-10 22:55:18 +00001751 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001752 return NULL;
1753
Guido van Rossum4c08d552000-03-10 22:55:18 +00001754 if (start < 0 || start > len || slen > len)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001755 return PyInt_FromLong(0);
1756
1757 upper = (end >= 0 && end <= len) ? end : len;
Guido van Rossum4c08d552000-03-10 22:55:18 +00001758 lower = (upper - slen) > start ? (upper - slen) : start;
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001759
Guido van Rossum4c08d552000-03-10 22:55:18 +00001760 if (upper-lower >= slen && !memcmp(str+lower, suffix, slen))
Barry Warsaw226ae6c1999-10-12 19:54:53 +00001761 return PyInt_FromLong(1);
1762 else return PyInt_FromLong(0);
1763}
1764
1765
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00001766static char encode__doc__[] =
1767"S.encode([encoding[,errors]]) -> string\n\
1768\n\
1769Return an encoded string version of S. Default encoding is the current\n\
1770default string encoding. errors may be given to set a different error\n\
1771handling scheme. Default is 'strict' meaning that encoding errors raise\n\
1772a ValueError. Other possible values are 'ignore' and 'replace'.";
1773
1774static PyObject *
1775string_encode(PyStringObject *self, PyObject *args)
1776{
1777 char *encoding = NULL;
1778 char *errors = NULL;
1779 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
1780 return NULL;
1781 return PyString_AsEncodedString((PyObject *)self, encoding, errors);
1782}
1783
1784
Guido van Rossum4c08d552000-03-10 22:55:18 +00001785static char expandtabs__doc__[] =
1786"S.expandtabs([tabsize]) -> string\n\
1787\n\
1788Return a copy of S where all tab characters are expanded using spaces.\n\
1789If tabsize is not given, a tab size of 8 characters is assumed.";
1790
1791static PyObject*
1792string_expandtabs(PyStringObject *self, PyObject *args)
1793{
1794 const char *e, *p;
1795 char *q;
1796 int i, j;
1797 PyObject *u;
1798 int tabsize = 8;
1799
1800 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
1801 return NULL;
1802
Thomas Wouters7e474022000-07-16 12:04:32 +00001803 /* First pass: determine size of output string */
Guido van Rossum4c08d552000-03-10 22:55:18 +00001804 i = j = 0;
1805 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
1806 for (p = PyString_AS_STRING(self); p < e; p++)
1807 if (*p == '\t') {
1808 if (tabsize > 0)
1809 j += tabsize - (j % tabsize);
1810 }
1811 else {
1812 j++;
1813 if (*p == '\n' || *p == '\r') {
1814 i += j;
1815 j = 0;
1816 }
1817 }
1818
1819 /* Second pass: create output string and fill it */
1820 u = PyString_FromStringAndSize(NULL, i + j);
1821 if (!u)
1822 return NULL;
1823
1824 j = 0;
1825 q = PyString_AS_STRING(u);
1826
1827 for (p = PyString_AS_STRING(self); p < e; p++)
1828 if (*p == '\t') {
1829 if (tabsize > 0) {
1830 i = tabsize - (j % tabsize);
1831 j += i;
1832 while (i--)
1833 *q++ = ' ';
1834 }
1835 }
1836 else {
1837 j++;
1838 *q++ = *p;
1839 if (*p == '\n' || *p == '\r')
1840 j = 0;
1841 }
1842
1843 return u;
1844}
1845
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001846static
1847PyObject *pad(PyStringObject *self,
1848 int left,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001849 int right,
1850 char fill)
1851{
1852 PyObject *u;
1853
1854 if (left < 0)
1855 left = 0;
1856 if (right < 0)
1857 right = 0;
1858
1859 if (left == 0 && right == 0) {
1860 Py_INCREF(self);
1861 return (PyObject *)self;
1862 }
1863
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001864 u = PyString_FromStringAndSize(NULL,
Guido van Rossum4c08d552000-03-10 22:55:18 +00001865 left + PyString_GET_SIZE(self) + right);
1866 if (u) {
1867 if (left)
1868 memset(PyString_AS_STRING(u), fill, left);
Tim Petersb3d8d1f2001-04-28 05:38:26 +00001869 memcpy(PyString_AS_STRING(u) + left,
1870 PyString_AS_STRING(self),
Guido van Rossum4c08d552000-03-10 22:55:18 +00001871 PyString_GET_SIZE(self));
1872 if (right)
1873 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
1874 fill, right);
1875 }
1876
1877 return u;
1878}
1879
1880static char ljust__doc__[] =
1881"S.ljust(width) -> string\n\
1882\n\
1883Return S left justified in a string of length width. Padding is\n\
1884done using spaces.";
1885
1886static PyObject *
1887string_ljust(PyStringObject *self, PyObject *args)
1888{
1889 int width;
1890 if (!PyArg_ParseTuple(args, "i:ljust", &width))
1891 return NULL;
1892
1893 if (PyString_GET_SIZE(self) >= width) {
1894 Py_INCREF(self);
1895 return (PyObject*) self;
1896 }
1897
1898 return pad(self, 0, width - PyString_GET_SIZE(self), ' ');
1899}
1900
1901
1902static char rjust__doc__[] =
1903"S.rjust(width) -> string\n\
1904\n\
1905Return S right justified in a string of length width. Padding is\n\
1906done using spaces.";
1907
1908static PyObject *
1909string_rjust(PyStringObject *self, PyObject *args)
1910{
1911 int width;
1912 if (!PyArg_ParseTuple(args, "i:rjust", &width))
1913 return NULL;
1914
1915 if (PyString_GET_SIZE(self) >= width) {
1916 Py_INCREF(self);
1917 return (PyObject*) self;
1918 }
1919
1920 return pad(self, width - PyString_GET_SIZE(self), 0, ' ');
1921}
1922
1923
1924static char center__doc__[] =
1925"S.center(width) -> string\n\
1926\n\
1927Return S centered in a string of length width. Padding is done\n\
1928using spaces.";
1929
1930static PyObject *
1931string_center(PyStringObject *self, PyObject *args)
1932{
1933 int marg, left;
1934 int width;
1935
1936 if (!PyArg_ParseTuple(args, "i:center", &width))
1937 return NULL;
1938
1939 if (PyString_GET_SIZE(self) >= width) {
1940 Py_INCREF(self);
1941 return (PyObject*) self;
1942 }
1943
1944 marg = width - PyString_GET_SIZE(self);
1945 left = marg / 2 + (marg & width & 1);
1946
1947 return pad(self, left, marg - left, ' ');
1948}
1949
1950#if 0
1951static char zfill__doc__[] =
1952"S.zfill(width) -> string\n\
1953\n\
1954Pad a numeric string x with zeros on the left, to fill a field\n\
1955of the specified width. The string x is never truncated.";
1956
1957static PyObject *
1958string_zfill(PyStringObject *self, PyObject *args)
1959{
1960 int fill;
1961 PyObject *u;
1962 char *str;
1963
1964 int width;
1965 if (!PyArg_ParseTuple(args, "i:zfill", &width))
1966 return NULL;
1967
1968 if (PyString_GET_SIZE(self) >= width) {
1969 Py_INCREF(self);
1970 return (PyObject*) self;
1971 }
1972
1973 fill = width - PyString_GET_SIZE(self);
1974
1975 u = pad(self, fill, 0, '0');
1976 if (u == NULL)
1977 return NULL;
1978
1979 str = PyString_AS_STRING(u);
1980 if (str[fill] == '+' || str[fill] == '-') {
1981 /* move sign to beginning of string */
1982 str[0] = str[fill];
1983 str[fill] = '0';
1984 }
1985
1986 return u;
1987}
1988#endif
1989
1990static char isspace__doc__[] =
1991"S.isspace() -> int\n\
1992\n\
1993Return 1 if there are only whitespace characters in S,\n\
19940 otherwise.";
1995
1996static PyObject*
1997string_isspace(PyStringObject *self, PyObject *args)
1998{
Fred Drakeba096332000-07-09 07:04:36 +00001999 register const unsigned char *p
2000 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002001 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002002
2003 if (!PyArg_NoArgs(args))
2004 return NULL;
2005
2006 /* Shortcut for single character strings */
2007 if (PyString_GET_SIZE(self) == 1 &&
2008 isspace(*p))
2009 return PyInt_FromLong(1);
2010
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002011 /* Special case for empty strings */
2012 if (PyString_GET_SIZE(self) == 0)
2013 return PyInt_FromLong(0);
2014
Guido van Rossum4c08d552000-03-10 22:55:18 +00002015 e = p + PyString_GET_SIZE(self);
2016 for (; p < e; p++) {
2017 if (!isspace(*p))
2018 return PyInt_FromLong(0);
2019 }
2020 return PyInt_FromLong(1);
2021}
2022
2023
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002024static char isalpha__doc__[] =
2025"S.isalpha() -> int\n\
2026\n\
2027Return 1 if all characters in S are alphabetic\n\
2028and there is at least one character in S, 0 otherwise.";
2029
2030static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002031string_isalpha(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002032{
Fred Drakeba096332000-07-09 07:04:36 +00002033 register const unsigned char *p
2034 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002035 register const unsigned char *e;
2036
2037 if (!PyArg_NoArgs(args))
2038 return NULL;
2039
2040 /* Shortcut for single character strings */
2041 if (PyString_GET_SIZE(self) == 1 &&
2042 isalpha(*p))
2043 return PyInt_FromLong(1);
2044
2045 /* Special case for empty strings */
2046 if (PyString_GET_SIZE(self) == 0)
2047 return PyInt_FromLong(0);
2048
2049 e = p + PyString_GET_SIZE(self);
2050 for (; p < e; p++) {
2051 if (!isalpha(*p))
2052 return PyInt_FromLong(0);
2053 }
2054 return PyInt_FromLong(1);
2055}
2056
2057
2058static char isalnum__doc__[] =
2059"S.isalnum() -> int\n\
2060\n\
2061Return 1 if all characters in S are alphanumeric\n\
2062and there is at least one character in S, 0 otherwise.";
2063
2064static PyObject*
Fred Drake49312a52000-12-06 14:27:49 +00002065string_isalnum(PyStringObject *self, PyObject *args)
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002066{
Fred Drakeba096332000-07-09 07:04:36 +00002067 register const unsigned char *p
2068 = (unsigned char *) PyString_AS_STRING(self);
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002069 register const unsigned char *e;
2070
2071 if (!PyArg_NoArgs(args))
2072 return NULL;
2073
2074 /* Shortcut for single character strings */
2075 if (PyString_GET_SIZE(self) == 1 &&
2076 isalnum(*p))
2077 return PyInt_FromLong(1);
2078
2079 /* Special case for empty strings */
2080 if (PyString_GET_SIZE(self) == 0)
2081 return PyInt_FromLong(0);
2082
2083 e = p + PyString_GET_SIZE(self);
2084 for (; p < e; p++) {
2085 if (!isalnum(*p))
2086 return PyInt_FromLong(0);
2087 }
2088 return PyInt_FromLong(1);
2089}
2090
2091
Guido van Rossum4c08d552000-03-10 22:55:18 +00002092static char isdigit__doc__[] =
2093"S.isdigit() -> int\n\
2094\n\
2095Return 1 if there are only digit characters in S,\n\
20960 otherwise.";
2097
2098static PyObject*
2099string_isdigit(PyStringObject *self, PyObject *args)
2100{
Fred Drakeba096332000-07-09 07:04:36 +00002101 register const unsigned char *p
2102 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002103 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002104
2105 if (!PyArg_NoArgs(args))
2106 return NULL;
2107
2108 /* Shortcut for single character strings */
2109 if (PyString_GET_SIZE(self) == 1 &&
2110 isdigit(*p))
2111 return PyInt_FromLong(1);
2112
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002113 /* Special case for empty strings */
2114 if (PyString_GET_SIZE(self) == 0)
2115 return PyInt_FromLong(0);
2116
Guido van Rossum4c08d552000-03-10 22:55:18 +00002117 e = p + PyString_GET_SIZE(self);
2118 for (; p < e; p++) {
2119 if (!isdigit(*p))
2120 return PyInt_FromLong(0);
2121 }
2122 return PyInt_FromLong(1);
2123}
2124
2125
2126static char islower__doc__[] =
2127"S.islower() -> int\n\
2128\n\
2129Return 1 if all cased characters in S are lowercase and there is\n\
2130at least one cased character in S, 0 otherwise.";
2131
2132static PyObject*
2133string_islower(PyStringObject *self, PyObject *args)
2134{
Fred Drakeba096332000-07-09 07:04:36 +00002135 register const unsigned char *p
2136 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002137 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002138 int cased;
2139
2140 if (!PyArg_NoArgs(args))
2141 return NULL;
2142
2143 /* Shortcut for single character strings */
2144 if (PyString_GET_SIZE(self) == 1)
2145 return PyInt_FromLong(islower(*p) != 0);
2146
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002147 /* Special case for empty strings */
2148 if (PyString_GET_SIZE(self) == 0)
2149 return PyInt_FromLong(0);
2150
Guido van Rossum4c08d552000-03-10 22:55:18 +00002151 e = p + PyString_GET_SIZE(self);
2152 cased = 0;
2153 for (; p < e; p++) {
2154 if (isupper(*p))
2155 return PyInt_FromLong(0);
2156 else if (!cased && islower(*p))
2157 cased = 1;
2158 }
2159 return PyInt_FromLong(cased);
2160}
2161
2162
2163static char isupper__doc__[] =
2164"S.isupper() -> int\n\
2165\n\
2166Return 1 if all cased characters in S are uppercase and there is\n\
2167at least one cased character in S, 0 otherwise.";
2168
2169static PyObject*
2170string_isupper(PyStringObject *self, PyObject *args)
2171{
Fred Drakeba096332000-07-09 07:04:36 +00002172 register const unsigned char *p
2173 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002174 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002175 int cased;
2176
2177 if (!PyArg_NoArgs(args))
2178 return NULL;
2179
2180 /* Shortcut for single character strings */
2181 if (PyString_GET_SIZE(self) == 1)
2182 return PyInt_FromLong(isupper(*p) != 0);
2183
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002184 /* Special case for empty strings */
2185 if (PyString_GET_SIZE(self) == 0)
2186 return PyInt_FromLong(0);
2187
Guido van Rossum4c08d552000-03-10 22:55:18 +00002188 e = p + PyString_GET_SIZE(self);
2189 cased = 0;
2190 for (; p < e; p++) {
2191 if (islower(*p))
2192 return PyInt_FromLong(0);
2193 else if (!cased && isupper(*p))
2194 cased = 1;
2195 }
2196 return PyInt_FromLong(cased);
2197}
2198
2199
2200static char istitle__doc__[] =
2201"S.istitle() -> int\n\
2202\n\
2203Return 1 if S is a titlecased string, i.e. uppercase characters\n\
2204may only follow uncased characters and lowercase characters only cased\n\
2205ones. Return 0 otherwise.";
2206
2207static PyObject*
2208string_istitle(PyStringObject *self, PyObject *args)
2209{
Fred Drakeba096332000-07-09 07:04:36 +00002210 register const unsigned char *p
2211 = (unsigned char *) PyString_AS_STRING(self);
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002212 register const unsigned char *e;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002213 int cased, previous_is_cased;
2214
2215 if (!PyArg_NoArgs(args))
2216 return NULL;
2217
2218 /* Shortcut for single character strings */
2219 if (PyString_GET_SIZE(self) == 1)
2220 return PyInt_FromLong(isupper(*p) != 0);
2221
Marc-André Lemburg60bc8092000-06-14 09:18:32 +00002222 /* Special case for empty strings */
2223 if (PyString_GET_SIZE(self) == 0)
2224 return PyInt_FromLong(0);
2225
Guido van Rossum4c08d552000-03-10 22:55:18 +00002226 e = p + PyString_GET_SIZE(self);
2227 cased = 0;
2228 previous_is_cased = 0;
2229 for (; p < e; p++) {
Guido van Rossumb8f820c2000-05-05 20:44:24 +00002230 register const unsigned char ch = *p;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002231
2232 if (isupper(ch)) {
2233 if (previous_is_cased)
2234 return PyInt_FromLong(0);
2235 previous_is_cased = 1;
2236 cased = 1;
2237 }
2238 else if (islower(ch)) {
2239 if (!previous_is_cased)
2240 return PyInt_FromLong(0);
2241 previous_is_cased = 1;
2242 cased = 1;
2243 }
2244 else
2245 previous_is_cased = 0;
2246 }
2247 return PyInt_FromLong(cased);
2248}
2249
2250
2251static char splitlines__doc__[] =
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002252"S.splitlines([keepends]]) -> list of strings\n\
Guido van Rossum4c08d552000-03-10 22:55:18 +00002253\n\
2254Return a list of the lines in S, breaking at line boundaries.\n\
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002255Line breaks are not included in the resulting list unless keepends\n\
2256is given and true.";
Guido van Rossum4c08d552000-03-10 22:55:18 +00002257
2258#define SPLIT_APPEND(data, left, right) \
2259 str = PyString_FromStringAndSize(data + left, right - left); \
2260 if (!str) \
2261 goto onError; \
2262 if (PyList_Append(list, str)) { \
2263 Py_DECREF(str); \
2264 goto onError; \
2265 } \
2266 else \
2267 Py_DECREF(str);
2268
2269static PyObject*
2270string_splitlines(PyStringObject *self, PyObject *args)
2271{
Guido van Rossum4c08d552000-03-10 22:55:18 +00002272 register int i;
2273 register int j;
2274 int len;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002275 int keepends = 0;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002276 PyObject *list;
2277 PyObject *str;
2278 char *data;
2279
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002280 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
Guido van Rossum4c08d552000-03-10 22:55:18 +00002281 return NULL;
2282
2283 data = PyString_AS_STRING(self);
2284 len = PyString_GET_SIZE(self);
2285
Guido van Rossum4c08d552000-03-10 22:55:18 +00002286 list = PyList_New(0);
2287 if (!list)
2288 goto onError;
2289
2290 for (i = j = 0; i < len; ) {
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002291 int eol;
2292
Guido van Rossum4c08d552000-03-10 22:55:18 +00002293 /* Find a line and append it */
2294 while (i < len && data[i] != '\n' && data[i] != '\r')
2295 i++;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002296
2297 /* Skip the line break reading CRLF as one line break */
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002298 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002299 if (i < len) {
2300 if (data[i] == '\r' && i + 1 < len &&
2301 data[i+1] == '\n')
2302 i += 2;
2303 else
2304 i++;
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002305 if (keepends)
2306 eol = i;
Guido van Rossum4c08d552000-03-10 22:55:18 +00002307 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002308 SPLIT_APPEND(data, j, eol);
Guido van Rossum4c08d552000-03-10 22:55:18 +00002309 j = i;
2310 }
2311 if (j < len) {
2312 SPLIT_APPEND(data, j, len);
2313 }
2314
2315 return list;
2316
2317 onError:
2318 Py_DECREF(list);
2319 return NULL;
2320}
2321
2322#undef SPLIT_APPEND
2323
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002324
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002325static PyMethodDef
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002326string_methods[] = {
Guido van Rossum4c08d552000-03-10 22:55:18 +00002327 /* Counterparts of the obsolete stropmodule functions; except
2328 string.maketrans(). */
2329 {"join", (PyCFunction)string_join, 1, join__doc__},
2330 {"split", (PyCFunction)string_split, 1, split__doc__},
2331 {"lower", (PyCFunction)string_lower, 1, lower__doc__},
2332 {"upper", (PyCFunction)string_upper, 1, upper__doc__},
2333 {"islower", (PyCFunction)string_islower, 0, islower__doc__},
2334 {"isupper", (PyCFunction)string_isupper, 0, isupper__doc__},
2335 {"isspace", (PyCFunction)string_isspace, 0, isspace__doc__},
2336 {"isdigit", (PyCFunction)string_isdigit, 0, isdigit__doc__},
2337 {"istitle", (PyCFunction)string_istitle, 0, istitle__doc__},
Marc-André Lemburg4027f8f2000-07-05 09:47:46 +00002338 {"isalpha", (PyCFunction)string_isalpha, 0, isalpha__doc__},
2339 {"isalnum", (PyCFunction)string_isalnum, 0, isalnum__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002340 {"capitalize", (PyCFunction)string_capitalize, 1, capitalize__doc__},
2341 {"count", (PyCFunction)string_count, 1, count__doc__},
2342 {"endswith", (PyCFunction)string_endswith, 1, endswith__doc__},
2343 {"find", (PyCFunction)string_find, 1, find__doc__},
2344 {"index", (PyCFunction)string_index, 1, index__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002345 {"lstrip", (PyCFunction)string_lstrip, 1, lstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002346 {"replace", (PyCFunction)string_replace, 1, replace__doc__},
2347 {"rfind", (PyCFunction)string_rfind, 1, rfind__doc__},
2348 {"rindex", (PyCFunction)string_rindex, 1, rindex__doc__},
2349 {"rstrip", (PyCFunction)string_rstrip, 1, rstrip__doc__},
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002350 {"startswith", (PyCFunction)string_startswith, 1, startswith__doc__},
2351 {"strip", (PyCFunction)string_strip, 1, strip__doc__},
2352 {"swapcase", (PyCFunction)string_swapcase, 1, swapcase__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002353 {"translate", (PyCFunction)string_translate, 1, translate__doc__},
2354 {"title", (PyCFunction)string_title, 1, title__doc__},
2355 {"ljust", (PyCFunction)string_ljust, 1, ljust__doc__},
2356 {"rjust", (PyCFunction)string_rjust, 1, rjust__doc__},
2357 {"center", (PyCFunction)string_center, 1, center__doc__},
Marc-André Lemburg63f3d172000-07-06 11:29:01 +00002358 {"encode", (PyCFunction)string_encode, 1, encode__doc__},
Guido van Rossum4c08d552000-03-10 22:55:18 +00002359 {"expandtabs", (PyCFunction)string_expandtabs, 1, expandtabs__doc__},
2360 {"splitlines", (PyCFunction)string_splitlines, 1, splitlines__doc__},
2361#if 0
2362 {"zfill", (PyCFunction)string_zfill, 1, zfill__doc__},
2363#endif
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002364 {NULL, NULL} /* sentinel */
2365};
2366
2367static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002368string_getattr(PyStringObject *s, char *name)
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002369{
2370 return Py_FindMethod(string_methods, (PyObject*)s, name);
2371}
2372
2373
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002374PyTypeObject PyString_Type = {
2375 PyObject_HEAD_INIT(&PyType_Type)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002376 0,
2377 "string",
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002378 sizeof(PyStringObject),
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002379 sizeof(char),
Guido van Rossum013142a1994-08-30 08:19:36 +00002380 (destructor)string_dealloc, /*tp_dealloc*/
2381 (printfunc)string_print, /*tp_print*/
Barry Warsaw226ae6c1999-10-12 19:54:53 +00002382 (getattrfunc)string_getattr, /*tp_getattr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002383 0, /*tp_setattr*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002384 (cmpfunc)string_compare, /*tp_compare*/
2385 (reprfunc)string_repr, /*tp_repr*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002386 0, /*tp_as_number*/
2387 &string_as_sequence, /*tp_as_sequence*/
2388 0, /*tp_as_mapping*/
Guido van Rossum013142a1994-08-30 08:19:36 +00002389 (hashfunc)string_hash, /*tp_hash*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002390 0, /*tp_call*/
Guido van Rossum189f1df2001-05-01 16:51:53 +00002391 (reprfunc)string_str, /*tp_str*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002392 0, /*tp_getattro*/
2393 0, /*tp_setattro*/
Guido van Rossumfdf95dd1997-05-05 22:15:02 +00002394 &string_as_buffer, /*tp_as_buffer*/
Guido van Rossum1db70701998-10-08 02:18:52 +00002395 Py_TPFLAGS_DEFAULT, /*tp_flags*/
Guido van Rossum2a61e741997-01-18 07:55:05 +00002396 0, /*tp_doc*/
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002397};
2398
2399void
Fred Drakeba096332000-07-09 07:04:36 +00002400PyString_Concat(register PyObject **pv, register PyObject *w)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002401{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002402 register PyObject *v;
Guido van Rossum013142a1994-08-30 08:19:36 +00002403 if (*pv == NULL)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002404 return;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002405 if (w == NULL || !PyString_Check(*pv)) {
2406 Py_DECREF(*pv);
Guido van Rossum013142a1994-08-30 08:19:36 +00002407 *pv = NULL;
2408 return;
2409 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002410 v = string_concat((PyStringObject *) *pv, w);
2411 Py_DECREF(*pv);
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002412 *pv = v;
2413}
2414
Guido van Rossum013142a1994-08-30 08:19:36 +00002415void
Fred Drakeba096332000-07-09 07:04:36 +00002416PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
Guido van Rossum013142a1994-08-30 08:19:36 +00002417{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002418 PyString_Concat(pv, w);
2419 Py_XDECREF(w);
Guido van Rossum013142a1994-08-30 08:19:36 +00002420}
2421
2422
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002423/* The following function breaks the notion that strings are immutable:
2424 it changes the size of a string. We get away with this only if there
2425 is only one module referencing the object. You can also think of it
2426 as creating a new string object and destroying the old one, only
2427 more efficiently. In any case, don't use this if the string may
2428 already be known to some other part of the code... */
2429
2430int
Fred Drakeba096332000-07-09 07:04:36 +00002431_PyString_Resize(PyObject **pv, int newsize)
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002432{
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002433 register PyObject *v;
2434 register PyStringObject *sv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002435 v = *pv;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002436 if (!PyString_Check(v) || v->ob_refcnt != 1) {
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002437 *pv = 0;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002438 Py_DECREF(v);
2439 PyErr_BadInternalCall();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002440 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002441 }
Guido van Rossum921842f1990-11-18 17:30:23 +00002442 /* XXX UNREF/NEWREF interface should be more symmetrical */
Guido van Rossum441e4ab1996-05-23 22:46:51 +00002443#ifdef Py_REF_DEBUG
Guido van Rossum6f9e4331995-03-29 16:57:48 +00002444 --_Py_RefTotal;
Guido van Rossum921842f1990-11-18 17:30:23 +00002445#endif
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002446 _Py_ForgetReference(v);
2447 *pv = (PyObject *)
Guido van Rossumb18618d2000-05-03 23:44:39 +00002448 PyObject_REALLOC((char *)v,
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002449 sizeof(PyStringObject) + newsize * sizeof(char));
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002450 if (*pv == NULL) {
Guido van Rossumb18618d2000-05-03 23:44:39 +00002451 PyObject_DEL(v);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002452 PyErr_NoMemory();
Guido van Rossum2a9096b1990-10-21 22:15:08 +00002453 return -1;
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002454 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002455 _Py_NewReference(*pv);
2456 sv = (PyStringObject *) *pv;
Guido van Rossum921842f1990-11-18 17:30:23 +00002457 sv->ob_size = newsize;
2458 sv->ob_sval[newsize] = '\0';
Guido van Rossum85a5fbb1990-10-14 12:07:46 +00002459 return 0;
2460}
Guido van Rossume5372401993-03-16 12:15:04 +00002461
2462/* Helpers for formatstring */
2463
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002464static PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002465getnextarg(PyObject *args, int arglen, int *p_argidx)
Guido van Rossume5372401993-03-16 12:15:04 +00002466{
2467 int argidx = *p_argidx;
2468 if (argidx < arglen) {
2469 (*p_argidx)++;
2470 if (arglen < 0)
2471 return args;
2472 else
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002473 return PyTuple_GetItem(args, argidx);
Guido van Rossume5372401993-03-16 12:15:04 +00002474 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002475 PyErr_SetString(PyExc_TypeError,
2476 "not enough arguments for format string");
Guido van Rossume5372401993-03-16 12:15:04 +00002477 return NULL;
2478}
2479
Tim Peters38fd5b62000-09-21 05:43:11 +00002480/* Format codes
2481 * F_LJUST '-'
2482 * F_SIGN '+'
2483 * F_BLANK ' '
2484 * F_ALT '#'
2485 * F_ZERO '0'
2486 */
Guido van Rossume5372401993-03-16 12:15:04 +00002487#define F_LJUST (1<<0)
2488#define F_SIGN (1<<1)
2489#define F_BLANK (1<<2)
2490#define F_ALT (1<<3)
2491#define F_ZERO (1<<4)
2492
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002493static int
Fred Drakeba096332000-07-09 07:04:36 +00002494formatfloat(char *buf, size_t buflen, int flags,
2495 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002496{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002497 /* fmt = '%#.' + `prec` + `type`
2498 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
Guido van Rossume5372401993-03-16 12:15:04 +00002499 char fmt[20];
Guido van Rossume5372401993-03-16 12:15:04 +00002500 double x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002501 if (!PyArg_Parse(v, "d;float argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002502 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002503 if (prec < 0)
2504 prec = 6;
Guido van Rossume5372401993-03-16 12:15:04 +00002505 if (type == 'f' && fabs(x)/1e25 >= 1e25)
2506 type = 'g';
2507 sprintf(fmt, "%%%s.%d%c", (flags&F_ALT) ? "#" : "", prec, type);
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002508 /* worst case length calc to ensure no buffer overrun:
2509 fmt = %#.<prec>g
2510 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002511 for any double rep.)
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002512 len = 1 + prec + 1 + 2 + 5 = 9 + prec
2513 If prec=0 the effective precision is 1 (the leading digit is
2514 always given), therefore increase by one to 10+prec. */
2515 if (buflen <= (size_t)10 + (size_t)prec) {
2516 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002517 "formatted float is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002518 return -1;
2519 }
Guido van Rossume5372401993-03-16 12:15:04 +00002520 sprintf(buf, fmt, x);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002521 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002522}
2523
Tim Peters38fd5b62000-09-21 05:43:11 +00002524/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
2525 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
2526 * Python's regular ints.
2527 * Return value: a new PyString*, or NULL if error.
2528 * . *pbuf is set to point into it,
2529 * *plen set to the # of chars following that.
2530 * Caller must decref it when done using pbuf.
2531 * The string starting at *pbuf is of the form
2532 * "-"? ("0x" | "0X")? digit+
2533 * "0x"/"0X" are present only for x and X conversions, with F_ALT
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002534 * set in flags. The case of hex digits will be correct,
Tim Peters38fd5b62000-09-21 05:43:11 +00002535 * There will be at least prec digits, zero-filled on the left if
2536 * necessary to get that many.
2537 * val object to be converted
2538 * flags bitmask of format flags; only F_ALT is looked at
2539 * prec minimum number of digits; 0-fill on left if needed
2540 * type a character in [duoxX]; u acts the same as d
2541 *
2542 * CAUTION: o, x and X conversions on regular ints can never
2543 * produce a '-' sign, but can for Python's unbounded ints.
2544 */
2545PyObject*
2546_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
2547 char **pbuf, int *plen)
2548{
2549 PyObject *result = NULL;
2550 char *buf;
2551 int i;
2552 int sign; /* 1 if '-', else 0 */
2553 int len; /* number of characters */
2554 int numdigits; /* len == numnondigits + numdigits */
2555 int numnondigits = 0;
2556
2557 switch (type) {
2558 case 'd':
2559 case 'u':
2560 result = val->ob_type->tp_str(val);
2561 break;
2562 case 'o':
2563 result = val->ob_type->tp_as_number->nb_oct(val);
2564 break;
2565 case 'x':
2566 case 'X':
2567 numnondigits = 2;
2568 result = val->ob_type->tp_as_number->nb_hex(val);
2569 break;
2570 default:
2571 assert(!"'type' not in [duoxX]");
2572 }
2573 if (!result)
2574 return NULL;
2575
2576 /* To modify the string in-place, there can only be one reference. */
2577 if (result->ob_refcnt != 1) {
2578 PyErr_BadInternalCall();
2579 return NULL;
2580 }
2581 buf = PyString_AsString(result);
2582 len = PyString_Size(result);
2583 if (buf[len-1] == 'L') {
2584 --len;
2585 buf[len] = '\0';
2586 }
2587 sign = buf[0] == '-';
2588 numnondigits += sign;
2589 numdigits = len - numnondigits;
2590 assert(numdigits > 0);
2591
Tim Petersfff53252001-04-12 18:38:48 +00002592 /* Get rid of base marker unless F_ALT */
2593 if ((flags & F_ALT) == 0) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002594 /* Need to skip 0x, 0X or 0. */
2595 int skipped = 0;
2596 switch (type) {
2597 case 'o':
2598 assert(buf[sign] == '0');
2599 /* If 0 is only digit, leave it alone. */
2600 if (numdigits > 1) {
2601 skipped = 1;
2602 --numdigits;
2603 }
2604 break;
2605 case 'x':
2606 case 'X':
2607 assert(buf[sign] == '0');
2608 assert(buf[sign + 1] == 'x');
2609 skipped = 2;
2610 numnondigits -= 2;
2611 break;
2612 }
2613 if (skipped) {
2614 buf += skipped;
2615 len -= skipped;
2616 if (sign)
2617 buf[0] = '-';
2618 }
2619 assert(len == numnondigits + numdigits);
2620 assert(numdigits > 0);
2621 }
2622
2623 /* Fill with leading zeroes to meet minimum width. */
2624 if (prec > numdigits) {
2625 PyObject *r1 = PyString_FromStringAndSize(NULL,
2626 numnondigits + prec);
2627 char *b1;
2628 if (!r1) {
2629 Py_DECREF(result);
2630 return NULL;
2631 }
2632 b1 = PyString_AS_STRING(r1);
2633 for (i = 0; i < numnondigits; ++i)
2634 *b1++ = *buf++;
2635 for (i = 0; i < prec - numdigits; i++)
2636 *b1++ = '0';
2637 for (i = 0; i < numdigits; i++)
2638 *b1++ = *buf++;
2639 *b1 = '\0';
2640 Py_DECREF(result);
2641 result = r1;
2642 buf = PyString_AS_STRING(result);
2643 len = numnondigits + prec;
2644 }
2645
2646 /* Fix up case for hex conversions. */
2647 switch (type) {
2648 case 'x':
2649 /* Need to convert all upper case letters to lower case. */
2650 for (i = 0; i < len; i++)
2651 if (buf[i] >= 'A' && buf[i] <= 'F')
2652 buf[i] += 'a'-'A';
2653 break;
2654 case 'X':
2655 /* Need to convert 0x to 0X (and -0x to -0X). */
2656 if (buf[sign + 1] == 'x')
2657 buf[sign + 1] = 'X';
2658 break;
2659 }
2660 *pbuf = buf;
2661 *plen = len;
2662 return result;
2663}
2664
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002665static int
Fred Drakeba096332000-07-09 07:04:36 +00002666formatint(char *buf, size_t buflen, int flags,
2667 int prec, int type, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002668{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002669 /* fmt = '%#.' + `prec` + 'l' + `type`
Tim Peters38fd5b62000-09-21 05:43:11 +00002670 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
2671 + 1 + 1 = 24 */
2672 char fmt[64]; /* plenty big enough! */
Guido van Rossume5372401993-03-16 12:15:04 +00002673 long x;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002674 if (!PyArg_Parse(v, "l;int argument required", &x))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002675 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002676 if (prec < 0)
2677 prec = 1;
2678 sprintf(fmt, "%%%s.%dl%c", (flags&F_ALT) ? "#" : "", prec, type);
Tim Peters38fd5b62000-09-21 05:43:11 +00002679 /* buf = '+'/'-'/'0'/'0x' + '[0-9]'*max(prec, len(x in octal))
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002680 worst case buf = '0x' + [0-9]*prec, where prec >= 11 */
Tim Peters38fd5b62000-09-21 05:43:11 +00002681 if (buflen <= 13 || buflen <= (size_t)2 + (size_t)prec) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002682 PyErr_SetString(PyExc_OverflowError,
Fred Drake661ea262000-10-24 19:57:45 +00002683 "formatted integer is too long (precision too large?)");
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002684 return -1;
2685 }
Guido van Rossume5372401993-03-16 12:15:04 +00002686 sprintf(buf, fmt, x);
Tim Petersfff53252001-04-12 18:38:48 +00002687 /* When converting 0 under %#x or %#X, C leaves off the base marker,
2688 * but we want it (for consistency with other %#x conversions, and
2689 * for consistency with Python's hex() function).
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002690 * BUG 28-Apr-2001 tim: At least two platform Cs (Metrowerks &
2691 * Compaq Tru64) violate the std by converting 0 w/ leading 0x anyway.
2692 * So add it only if the platform didn't already.
Tim Petersfff53252001-04-12 18:38:48 +00002693 */
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002694 if (x == 0 &&
2695 (flags & F_ALT) &&
2696 (type == 'x' || type == 'X') &&
2697 buf[1] != (char)type) /* this last always true under std C */
2698 {
Tim Petersfff53252001-04-12 18:38:48 +00002699 memmove(buf+2, buf, strlen(buf) + 1);
2700 buf[0] = '0';
2701 buf[1] = (char)type;
2702 }
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002703 return strlen(buf);
Guido van Rossume5372401993-03-16 12:15:04 +00002704}
2705
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002706static int
Fred Drakeba096332000-07-09 07:04:36 +00002707formatchar(char *buf, size_t buflen, PyObject *v)
Guido van Rossume5372401993-03-16 12:15:04 +00002708{
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002709 /* presume that the buffer is at least 2 characters long */
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002710 if (PyString_Check(v)) {
2711 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002712 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002713 }
2714 else {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002715 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002716 return -1;
Guido van Rossume5372401993-03-16 12:15:04 +00002717 }
2718 buf[1] = '\0';
Guido van Rossuma04d47b1997-01-21 16:12:09 +00002719 return 1;
Guido van Rossume5372401993-03-16 12:15:04 +00002720}
2721
Guido van Rossum013142a1994-08-30 08:19:36 +00002722
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002723/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
2724
2725 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
2726 chars are formatted. XXX This is a magic number. Each formatting
2727 routine does bounds checking to ensure no overflow, but a better
2728 solution may be to malloc a buffer of appropriate size for each
2729 format. For now, the current solution is sufficient.
2730*/
2731#define FORMATBUFLEN (size_t)120
Guido van Rossume5372401993-03-16 12:15:04 +00002732
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002733PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00002734PyString_Format(PyObject *format, PyObject *args)
Guido van Rossume5372401993-03-16 12:15:04 +00002735{
2736 char *fmt, *res;
2737 int fmtcnt, rescnt, reslen, arglen, argidx;
Guido van Rossum993952b1996-05-21 22:44:20 +00002738 int args_owned = 0;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00002739 PyObject *result, *orig_args, *v, *w;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002740 PyObject *dict = NULL;
2741 if (format == NULL || !PyString_Check(format) || args == NULL) {
2742 PyErr_BadInternalCall();
Guido van Rossume5372401993-03-16 12:15:04 +00002743 return NULL;
2744 }
Guido van Rossum90daa872000-04-10 13:47:21 +00002745 orig_args = args;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002746 fmt = PyString_AsString(format);
2747 fmtcnt = PyString_Size(format);
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002748 reslen = rescnt = fmtcnt + 100;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002749 result = PyString_FromStringAndSize((char *)NULL, reslen);
Guido van Rossume5372401993-03-16 12:15:04 +00002750 if (result == NULL)
2751 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002752 res = PyString_AsString(result);
2753 if (PyTuple_Check(args)) {
2754 arglen = PyTuple_Size(args);
Guido van Rossume5372401993-03-16 12:15:04 +00002755 argidx = 0;
2756 }
2757 else {
2758 arglen = -1;
2759 argidx = -2;
2760 }
Guido van Rossum013142a1994-08-30 08:19:36 +00002761 if (args->ob_type->tp_as_mapping)
2762 dict = args;
Guido van Rossume5372401993-03-16 12:15:04 +00002763 while (--fmtcnt >= 0) {
2764 if (*fmt != '%') {
2765 if (--rescnt < 0) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00002766 rescnt = fmtcnt + 100;
2767 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002768 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00002769 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002770 res = PyString_AsString(result)
2771 + reslen - rescnt;
Guido van Rossum013142a1994-08-30 08:19:36 +00002772 --rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00002773 }
2774 *res++ = *fmt++;
2775 }
2776 else {
2777 /* Got a format specifier */
2778 int flags = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00002779 int width = -1;
2780 int prec = -1;
2781 int size = 0;
Guido van Rossum6938a291993-11-11 14:51:57 +00002782 int c = '\0';
Guido van Rossume5372401993-03-16 12:15:04 +00002783 int fill;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002784 PyObject *v = NULL;
2785 PyObject *temp = NULL;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002786 char *pbuf;
Guido van Rossume5372401993-03-16 12:15:04 +00002787 int sign;
2788 int len;
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002789 char formatbuf[FORMATBUFLEN]; /* For format{float,int,char}() */
Guido van Rossum90daa872000-04-10 13:47:21 +00002790 char *fmt_start = fmt;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002791 int argidx_start = argidx;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002792
Guido van Rossumda9c2711996-12-05 21:58:58 +00002793 fmt++;
Guido van Rossum013142a1994-08-30 08:19:36 +00002794 if (*fmt == '(') {
2795 char *keystart;
2796 int keylen;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002797 PyObject *key;
Guido van Rossum045e6881997-09-08 18:30:11 +00002798 int pcount = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002799
2800 if (dict == NULL) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002801 PyErr_SetString(PyExc_TypeError,
Tim Petersb3d8d1f2001-04-28 05:38:26 +00002802 "format requires a mapping");
Guido van Rossum013142a1994-08-30 08:19:36 +00002803 goto error;
2804 }
2805 ++fmt;
2806 --fmtcnt;
2807 keystart = fmt;
Guido van Rossum045e6881997-09-08 18:30:11 +00002808 /* Skip over balanced parentheses */
2809 while (pcount > 0 && --fmtcnt >= 0) {
2810 if (*fmt == ')')
2811 --pcount;
2812 else if (*fmt == '(')
2813 ++pcount;
Guido van Rossum013142a1994-08-30 08:19:36 +00002814 fmt++;
Guido van Rossum045e6881997-09-08 18:30:11 +00002815 }
2816 keylen = fmt - keystart - 1;
2817 if (fmtcnt < 0 || pcount > 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002818 PyErr_SetString(PyExc_ValueError,
Guido van Rossum013142a1994-08-30 08:19:36 +00002819 "incomplete format key");
2820 goto error;
2821 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002822 key = PyString_FromStringAndSize(keystart,
2823 keylen);
Guido van Rossum013142a1994-08-30 08:19:36 +00002824 if (key == NULL)
2825 goto error;
Guido van Rossum993952b1996-05-21 22:44:20 +00002826 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002827 Py_DECREF(args);
Guido van Rossum993952b1996-05-21 22:44:20 +00002828 args_owned = 0;
2829 }
2830 args = PyObject_GetItem(dict, key);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002831 Py_DECREF(key);
Guido van Rossum013142a1994-08-30 08:19:36 +00002832 if (args == NULL) {
2833 goto error;
2834 }
Guido van Rossum993952b1996-05-21 22:44:20 +00002835 args_owned = 1;
Guido van Rossum013142a1994-08-30 08:19:36 +00002836 arglen = -1;
2837 argidx = -2;
2838 }
Guido van Rossume5372401993-03-16 12:15:04 +00002839 while (--fmtcnt >= 0) {
2840 switch (c = *fmt++) {
2841 case '-': flags |= F_LJUST; continue;
2842 case '+': flags |= F_SIGN; continue;
2843 case ' ': flags |= F_BLANK; continue;
2844 case '#': flags |= F_ALT; continue;
2845 case '0': flags |= F_ZERO; continue;
2846 }
2847 break;
2848 }
2849 if (c == '*') {
2850 v = getnextarg(args, arglen, &argidx);
2851 if (v == NULL)
2852 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002853 if (!PyInt_Check(v)) {
2854 PyErr_SetString(PyExc_TypeError,
2855 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002856 goto error;
2857 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002858 width = PyInt_AsLong(v);
Guido van Rossum98c9eba1999-06-07 15:12:32 +00002859 if (width < 0) {
2860 flags |= F_LJUST;
2861 width = -width;
2862 }
Guido van Rossume5372401993-03-16 12:15:04 +00002863 if (--fmtcnt >= 0)
2864 c = *fmt++;
2865 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002866 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002867 width = c - '0';
2868 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002869 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002870 if (!isdigit(c))
2871 break;
2872 if ((width*10) / 10 != width) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002873 PyErr_SetString(
2874 PyExc_ValueError,
2875 "width too big");
Guido van Rossume5372401993-03-16 12:15:04 +00002876 goto error;
2877 }
2878 width = width*10 + (c - '0');
2879 }
2880 }
2881 if (c == '.') {
2882 prec = 0;
2883 if (--fmtcnt >= 0)
2884 c = *fmt++;
2885 if (c == '*') {
2886 v = getnextarg(args, arglen, &argidx);
2887 if (v == NULL)
2888 goto error;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002889 if (!PyInt_Check(v)) {
2890 PyErr_SetString(
2891 PyExc_TypeError,
2892 "* wants int");
Guido van Rossume5372401993-03-16 12:15:04 +00002893 goto error;
2894 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002895 prec = PyInt_AsLong(v);
Guido van Rossume5372401993-03-16 12:15:04 +00002896 if (prec < 0)
2897 prec = 0;
2898 if (--fmtcnt >= 0)
2899 c = *fmt++;
2900 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002901 else if (c >= 0 && isdigit(c)) {
Guido van Rossume5372401993-03-16 12:15:04 +00002902 prec = c - '0';
2903 while (--fmtcnt >= 0) {
Guido van Rossum9fa2c111995-02-10 17:00:37 +00002904 c = Py_CHARMASK(*fmt++);
Guido van Rossume5372401993-03-16 12:15:04 +00002905 if (!isdigit(c))
2906 break;
2907 if ((prec*10) / 10 != prec) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002908 PyErr_SetString(
2909 PyExc_ValueError,
Guido van Rossume5372401993-03-16 12:15:04 +00002910 "prec too big");
2911 goto error;
2912 }
2913 prec = prec*10 + (c - '0');
2914 }
2915 }
2916 } /* prec */
2917 if (fmtcnt >= 0) {
2918 if (c == 'h' || c == 'l' || c == 'L') {
2919 size = c;
2920 if (--fmtcnt >= 0)
2921 c = *fmt++;
2922 }
2923 }
2924 if (fmtcnt < 0) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002925 PyErr_SetString(PyExc_ValueError,
2926 "incomplete format");
Guido van Rossume5372401993-03-16 12:15:04 +00002927 goto error;
2928 }
2929 if (c != '%') {
2930 v = getnextarg(args, arglen, &argidx);
2931 if (v == NULL)
2932 goto error;
2933 }
2934 sign = 0;
2935 fill = ' ';
2936 switch (c) {
2937 case '%':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002938 pbuf = "%";
Guido van Rossume5372401993-03-16 12:15:04 +00002939 len = 1;
2940 break;
2941 case 's':
Guido van Rossum90daa872000-04-10 13:47:21 +00002942 case 'r':
2943 if (PyUnicode_Check(v)) {
2944 fmt = fmt_start;
Marc-André Lemburg542fe562001-05-02 14:21:53 +00002945 argidx = argidx_start;
Guido van Rossum90daa872000-04-10 13:47:21 +00002946 goto unicode;
2947 }
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002948 if (c == 's')
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002949 temp = PyObject_Str(v);
Guido van Rossumf0b7b042000-04-11 15:39:26 +00002950 else
2951 temp = PyObject_Repr(v);
Guido van Rossum013142a1994-08-30 08:19:36 +00002952 if (temp == NULL)
Guido van Rossume5372401993-03-16 12:15:04 +00002953 goto error;
Guido van Rossum4a0144c1998-06-09 15:08:41 +00002954 if (!PyString_Check(temp)) {
2955 PyErr_SetString(PyExc_TypeError,
2956 "%s argument has non-string str()");
2957 goto error;
2958 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002959 pbuf = PyString_AsString(temp);
Guido van Rossumc0b618a1997-05-02 03:12:38 +00002960 len = PyString_Size(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00002961 if (prec >= 0 && len > prec)
2962 len = prec;
2963 break;
2964 case 'i':
2965 case 'd':
2966 case 'u':
2967 case 'o':
2968 case 'x':
2969 case 'X':
2970 if (c == 'i')
2971 c = 'd';
Tim Petersa3a3a032000-11-30 05:22:44 +00002972 if (PyLong_Check(v)) {
Tim Peters38fd5b62000-09-21 05:43:11 +00002973 temp = _PyString_FormatLong(v, flags,
2974 prec, c, &pbuf, &len);
2975 if (!temp)
2976 goto error;
2977 /* unbounded ints can always produce
2978 a sign character! */
2979 sign = 1;
Guido van Rossum4acdc231997-01-29 06:00:24 +00002980 }
Tim Peters38fd5b62000-09-21 05:43:11 +00002981 else {
2982 pbuf = formatbuf;
2983 len = formatint(pbuf, sizeof(formatbuf),
2984 flags, prec, c, v);
2985 if (len < 0)
2986 goto error;
2987 /* only d conversion is signed */
2988 sign = c == 'd';
2989 }
2990 if (flags & F_ZERO)
2991 fill = '0';
Guido van Rossume5372401993-03-16 12:15:04 +00002992 break;
2993 case 'e':
2994 case 'E':
2995 case 'f':
2996 case 'g':
2997 case 'G':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00002998 pbuf = formatbuf;
2999 len = formatfloat(pbuf, sizeof(formatbuf), flags, prec, c, v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003000 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003001 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003002 sign = 1;
Tim Peters38fd5b62000-09-21 05:43:11 +00003003 if (flags & F_ZERO)
Guido van Rossume5372401993-03-16 12:15:04 +00003004 fill = '0';
3005 break;
3006 case 'c':
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003007 pbuf = formatbuf;
3008 len = formatchar(pbuf, sizeof(formatbuf), v);
Guido van Rossuma04d47b1997-01-21 16:12:09 +00003009 if (len < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003010 goto error;
Guido van Rossume5372401993-03-16 12:15:04 +00003011 break;
3012 default:
Guido van Rossum045e6881997-09-08 18:30:11 +00003013 PyErr_Format(PyExc_ValueError,
Andrew M. Kuchling6ca89172000-12-15 13:07:46 +00003014 "unsupported format character '%c' (0x%x) "
3015 "at index %i",
3016 c, c, fmt - 1 - PyString_AsString(format));
Guido van Rossume5372401993-03-16 12:15:04 +00003017 goto error;
3018 }
3019 if (sign) {
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003020 if (*pbuf == '-' || *pbuf == '+') {
3021 sign = *pbuf++;
Guido van Rossume5372401993-03-16 12:15:04 +00003022 len--;
3023 }
3024 else if (flags & F_SIGN)
3025 sign = '+';
3026 else if (flags & F_BLANK)
3027 sign = ' ';
3028 else
Tim Peters38fd5b62000-09-21 05:43:11 +00003029 sign = 0;
Guido van Rossume5372401993-03-16 12:15:04 +00003030 }
3031 if (width < len)
3032 width = len;
Tim Peters38fd5b62000-09-21 05:43:11 +00003033 if (rescnt < width + (sign != 0)) {
Guido van Rossum6ac258d1993-05-12 08:24:20 +00003034 reslen -= rescnt;
3035 rescnt = width + fmtcnt + 100;
3036 reslen += rescnt;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003037 if (_PyString_Resize(&result, reslen) < 0)
Guido van Rossume5372401993-03-16 12:15:04 +00003038 return NULL;
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003039 res = PyString_AsString(result)
3040 + reslen - rescnt;
Guido van Rossume5372401993-03-16 12:15:04 +00003041 }
3042 if (sign) {
Guido van Rossum71e57d01993-11-11 15:03:51 +00003043 if (fill != ' ')
3044 *res++ = sign;
Guido van Rossume5372401993-03-16 12:15:04 +00003045 rescnt--;
3046 if (width > len)
3047 width--;
3048 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003049 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
3050 assert(pbuf[0] == '0');
Tim Petersfff53252001-04-12 18:38:48 +00003051 assert(pbuf[1] == c);
3052 if (fill != ' ') {
3053 *res++ = *pbuf++;
3054 *res++ = *pbuf++;
Tim Peters38fd5b62000-09-21 05:43:11 +00003055 }
Tim Petersfff53252001-04-12 18:38:48 +00003056 rescnt -= 2;
3057 width -= 2;
3058 if (width < 0)
3059 width = 0;
3060 len -= 2;
Tim Peters38fd5b62000-09-21 05:43:11 +00003061 }
3062 if (width > len && !(flags & F_LJUST)) {
Guido van Rossume5372401993-03-16 12:15:04 +00003063 do {
3064 --rescnt;
3065 *res++ = fill;
3066 } while (--width > len);
3067 }
Tim Peters38fd5b62000-09-21 05:43:11 +00003068 if (fill == ' ') {
3069 if (sign)
3070 *res++ = sign;
3071 if ((flags & F_ALT) &&
Tim Petersfff53252001-04-12 18:38:48 +00003072 (c == 'x' || c == 'X')) {
3073 assert(pbuf[0] == '0');
3074 assert(pbuf[1] == c);
Tim Peters38fd5b62000-09-21 05:43:11 +00003075 *res++ = *pbuf++;
3076 *res++ = *pbuf++;
3077 }
3078 }
Marc-André Lemburgf28dd832000-06-30 10:29:57 +00003079 memcpy(res, pbuf, len);
Guido van Rossume5372401993-03-16 12:15:04 +00003080 res += len;
3081 rescnt -= len;
3082 while (--width >= len) {
3083 --rescnt;
3084 *res++ = ' ';
3085 }
Guido van Rossum9fa2c111995-02-10 17:00:37 +00003086 if (dict && (argidx < arglen) && c != '%') {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003087 PyErr_SetString(PyExc_TypeError,
Guido van Rossum013142a1994-08-30 08:19:36 +00003088 "not all arguments converted");
3089 goto error;
3090 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003091 Py_XDECREF(temp);
Guido van Rossume5372401993-03-16 12:15:04 +00003092 } /* '%' */
3093 } /* until end */
Guido van Rossumcaeaafc1995-02-27 10:13:23 +00003094 if (argidx < arglen && !dict) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003095 PyErr_SetString(PyExc_TypeError,
3096 "not all arguments converted");
Guido van Rossume5372401993-03-16 12:15:04 +00003097 goto error;
3098 }
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003099 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003100 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003101 }
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003102 _PyString_Resize(&result, reslen - rescnt);
Guido van Rossume5372401993-03-16 12:15:04 +00003103 return result;
Guido van Rossum90daa872000-04-10 13:47:21 +00003104
3105 unicode:
3106 if (args_owned) {
3107 Py_DECREF(args);
3108 args_owned = 0;
3109 }
Marc-André Lemburg542fe562001-05-02 14:21:53 +00003110 /* Fiddle args right (remove the first argidx arguments) */
Guido van Rossum90daa872000-04-10 13:47:21 +00003111 if (PyTuple_Check(orig_args) && argidx > 0) {
3112 PyObject *v;
3113 int n = PyTuple_GET_SIZE(orig_args) - argidx;
3114 v = PyTuple_New(n);
3115 if (v == NULL)
3116 goto error;
3117 while (--n >= 0) {
3118 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
3119 Py_INCREF(w);
3120 PyTuple_SET_ITEM(v, n, w);
3121 }
3122 args = v;
3123 } else {
3124 Py_INCREF(orig_args);
3125 args = orig_args;
3126 }
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003127 args_owned = 1;
3128 /* Take what we have of the result and let the Unicode formatting
3129 function format the rest of the input. */
Guido van Rossum90daa872000-04-10 13:47:21 +00003130 rescnt = res - PyString_AS_STRING(result);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003131 if (_PyString_Resize(&result, rescnt))
3132 goto error;
Guido van Rossum90daa872000-04-10 13:47:21 +00003133 fmtcnt = PyString_GET_SIZE(format) - \
3134 (fmt - PyString_AS_STRING(format));
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003135 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
3136 if (format == NULL)
Guido van Rossum90daa872000-04-10 13:47:21 +00003137 goto error;
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003138 v = PyUnicode_Format(format, args);
Guido van Rossum90daa872000-04-10 13:47:21 +00003139 Py_DECREF(format);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003140 if (v == NULL)
3141 goto error;
3142 /* Paste what we have (result) to what the Unicode formatting
3143 function returned (v) and return the result (or error) */
3144 w = PyUnicode_Concat(result, v);
3145 Py_DECREF(result);
3146 Py_DECREF(v);
Guido van Rossum90daa872000-04-10 13:47:21 +00003147 Py_DECREF(args);
Marc-André Lemburg53f3d4a2000-10-07 08:54:09 +00003148 return w;
Tim Petersb3d8d1f2001-04-28 05:38:26 +00003149
Guido van Rossume5372401993-03-16 12:15:04 +00003150 error:
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003151 Py_DECREF(result);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003152 if (args_owned) {
Guido van Rossumc0b618a1997-05-02 03:12:38 +00003153 Py_DECREF(args);
Guido van Rossum1109fbc1998-04-10 22:16:39 +00003154 }
Guido van Rossume5372401993-03-16 12:15:04 +00003155 return NULL;
3156}
Guido van Rossum2a61e741997-01-18 07:55:05 +00003157
3158
3159#ifdef INTERN_STRINGS
3160
Barry Warsaw4df762f2000-08-16 23:41:01 +00003161/* This dictionary will leak at PyString_Fini() time. That's acceptable
3162 * because PyString_Fini() specifically frees interned strings that are
3163 * only referenced by this dictionary. The CVS log entry for revision 2.45
3164 * says:
3165 *
3166 * Change the Fini function to only remove otherwise unreferenced
3167 * strings from the interned table. There are references in
3168 * hard-to-find static variables all over the interpreter, and it's not
3169 * worth trying to get rid of all those; but "uninterning" isn't fair
3170 * either and may cause subtle failures later -- so we have to keep them
3171 * in the interned table.
3172 */
Guido van Rossum2a61e741997-01-18 07:55:05 +00003173static PyObject *interned;
3174
3175void
Fred Drakeba096332000-07-09 07:04:36 +00003176PyString_InternInPlace(PyObject **p)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003177{
3178 register PyStringObject *s = (PyStringObject *)(*p);
3179 PyObject *t;
3180 if (s == NULL || !PyString_Check(s))
3181 Py_FatalError("PyString_InternInPlace: strings only please!");
3182 if ((t = s->ob_sinterned) != NULL) {
3183 if (t == (PyObject *)s)
3184 return;
3185 Py_INCREF(t);
3186 *p = t;
3187 Py_DECREF(s);
3188 return;
3189 }
3190 if (interned == NULL) {
3191 interned = PyDict_New();
3192 if (interned == NULL)
3193 return;
Guido van Rossum2a61e741997-01-18 07:55:05 +00003194 }
3195 if ((t = PyDict_GetItem(interned, (PyObject *)s)) != NULL) {
3196 Py_INCREF(t);
3197 *p = s->ob_sinterned = t;
3198 Py_DECREF(s);
3199 return;
3200 }
3201 t = (PyObject *)s;
3202 if (PyDict_SetItem(interned, t, t) == 0) {
3203 s->ob_sinterned = t;
3204 return;
3205 }
3206 PyErr_Clear();
3207}
3208
3209
3210PyObject *
Fred Drakeba096332000-07-09 07:04:36 +00003211PyString_InternFromString(const char *cp)
Guido van Rossum2a61e741997-01-18 07:55:05 +00003212{
3213 PyObject *s = PyString_FromString(cp);
3214 if (s == NULL)
3215 return NULL;
3216 PyString_InternInPlace(&s);
3217 return s;
3218}
3219
3220#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003221
3222void
Fred Drakeba096332000-07-09 07:04:36 +00003223PyString_Fini(void)
Guido van Rossum8cf04761997-08-02 02:57:45 +00003224{
3225 int i;
Guido van Rossum8cf04761997-08-02 02:57:45 +00003226 for (i = 0; i < UCHAR_MAX + 1; i++) {
3227 Py_XDECREF(characters[i]);
3228 characters[i] = NULL;
3229 }
3230#ifndef DONT_SHARE_SHORT_STRINGS
3231 Py_XDECREF(nullstring);
3232 nullstring = NULL;
3233#endif
Guido van Rossum971a7aa1997-08-05 02:15:12 +00003234#ifdef INTERN_STRINGS
3235 if (interned) {
3236 int pos, changed;
3237 PyObject *key, *value;
3238 do {
3239 changed = 0;
3240 pos = 0;
3241 while (PyDict_Next(interned, &pos, &key, &value)) {
3242 if (key->ob_refcnt == 2 && key == value) {
3243 PyDict_DelItem(interned, key);
3244 changed = 1;
3245 }
3246 }
3247 } while (changed);
3248 }
3249#endif
Guido van Rossum8cf04761997-08-02 02:57:45 +00003250}
Barry Warsawa903ad982001-02-23 16:40:48 +00003251
3252#ifdef INTERN_STRINGS
3253void _Py_ReleaseInternedStrings(void)
3254{
3255 if (interned) {
Guido van Rossum59d1d2b2001-04-20 19:13:02 +00003256 fprintf(stderr, "releasing interned strings\n");
3257 PyDict_Clear(interned);
Barry Warsawa903ad982001-02-23 16:40:48 +00003258 Py_DECREF(interned);
3259 interned = NULL;
3260 }
3261}
3262#endif /* INTERN_STRINGS */